From b332828c39326b1dca617f387dd15d12e81cd5f0 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:10 +0530 Subject: hw-breakpoints: prepare the code for Hardware Breakpoint interfaces The generic hardware breakpoint interface provides an abstraction of hardware breakpoints in front of specific arch implementations for both kernel and user side breakpoints. This includes execution breakpoints and read/write breakpoints, also known as "watchpoints". This patch introduces header files containing constants, structure definitions and declaration of functions used by the hardware breakpoint core and x86 specific code. It also introduces an array based storage for the debug-register values in 'struct thread_struct', while modifying all users of debugreg member in the structure. [ Impact: add headers for new hardware breakpoint interface ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index bb70e39..fc4685d 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -32,10 +32,10 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - dump->u_debugreg[0] = current->thread.debugreg0; - dump->u_debugreg[1] = current->thread.debugreg1; - dump->u_debugreg[2] = current->thread.debugreg2; - dump->u_debugreg[3] = current->thread.debugreg3; + dump->u_debugreg[0] = current->thread.debugreg[0]; + dump->u_debugreg[1] = current->thread.debugreg[1]; + dump->u_debugreg[2] = current->thread.debugreg[2]; + dump->u_debugreg[3] = current->thread.debugreg[3]; dump->u_debugreg[4] = 0; dump->u_debugreg[5] = 0; dump->u_debugreg[6] = current->thread.debugreg6; diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3ea6f37..23439fb 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -18,6 +18,7 @@ #define DR_TRAP1 (0x2) /* db1 */ #define DR_TRAP2 (0x4) /* db2 */ #define DR_TRAP3 (0x8) /* db3 */ +#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) #define DR_STEP (0x4000) /* single-step */ #define DR_SWITCH (0x8000) /* task switch */ @@ -49,6 +50,8 @@ #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ +#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ +#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ @@ -67,4 +70,30 @@ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ +/* + * HW breakpoint additions + */ +#ifdef __KERNEL__ + +/* For process management */ +extern void flush_thread_hw_breakpoint(struct task_struct *tsk); +extern int copy_thread_hw_breakpoint(struct task_struct *tsk, + struct task_struct *child, unsigned long clone_flags); + +/* For CPU management */ +extern void load_debug_registers(void); +static inline void hw_breakpoint_disable(void) +{ + /* Zero the control register for HW Breakpoint */ + set_debugreg(0UL, 7); + + /* Zero-out the individual HW breakpoint address registers */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); +} + +#endif /* __KERNEL__ */ + #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 0000000..1acb4d4 --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -0,0 +1,55 @@ +#ifndef _I386_HW_BREAKPOINT_H +#define _I386_HW_BREAKPOINT_H + +#ifdef __KERNEL__ +#define __ARCH_HW_BREAKPOINT_H + +struct arch_hw_breakpoint { + char *name; /* Contains name of the symbol to set bkpt */ + unsigned long address; + u8 len; + u8 type; +}; + +#include +#include + +/* Available HW breakpoint length encodings */ +#define HW_BREAKPOINT_LEN_1 0x40 +#define HW_BREAKPOINT_LEN_2 0x44 +#define HW_BREAKPOINT_LEN_4 0x4c +#define HW_BREAKPOINT_LEN_EXECUTE 0x40 + +#ifdef CONFIG_X86_64 +#define HW_BREAKPOINT_LEN_8 0x48 +#endif + +/* Available HW breakpoint type encodings */ + +/* trigger on instruction execute */ +#define HW_BREAKPOINT_EXECUTE 0x80 +/* trigger on memory write */ +#define HW_BREAKPOINT_WRITE 0x81 +/* trigger on memory read or write */ +#define HW_BREAKPOINT_RW 0x83 + +/* Total number of available HW breakpoint registers */ +#define HBP_NUM 4 + +extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; +DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); +extern unsigned int hbp_user_refcount[HBP_NUM]; + +extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk); +extern void arch_uninstall_thread_hw_breakpoint(void); +extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); +extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, + struct task_struct *tsk); +extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk); +extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk); +extern void arch_update_kernel_hw_breakpoint(void *); +extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data); +#endif /* __KERNEL__ */ +#endif /* _I386_HW_BREAKPOINT_H */ + diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0b2fab0..448b34a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -29,6 +29,7 @@ struct mm_struct; #include #include +#define HBP_NUM 4 /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -431,12 +432,11 @@ struct thread_struct { unsigned long fs; unsigned long gs; /* Hardware debugging registers: */ - unsigned long debugreg0; - unsigned long debugreg1; - unsigned long debugreg2; - unsigned long debugreg3; + unsigned long debugreg[HBP_NUM]; unsigned long debugreg6; unsigned long debugreg7; + /* Hardware breakpoint info */ + struct hw_breakpoint *hbp[HBP_NUM]; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index fb5dfb8..291527c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -106,10 +106,10 @@ void flush_thread(void) clear_tsk_thread_flag(tsk, TIF_DEBUG); - tsk->thread.debugreg0 = 0; - tsk->thread.debugreg1 = 0; - tsk->thread.debugreg2 = 0; - tsk->thread.debugreg3 = 0; + tsk->thread.debugreg[0] = 0; + tsk->thread.debugreg[1] = 0; + tsk->thread.debugreg[2] = 0; + tsk->thread.debugreg[3] = 0; tsk->thread.debugreg6 = 0; tsk->thread.debugreg7 = 0; memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); @@ -194,10 +194,10 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, update_debugctlmsr(next->debugctlmsr); if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg0, 0); - set_debugreg(next->debugreg1, 1); - set_debugreg(next->debugreg2, 2); - set_debugreg(next->debugreg3, 3); + set_debugreg(next->debugreg[0], 0); + set_debugreg(next->debugreg[1], 1); + set_debugreg(next->debugreg[2], 2); + set_debugreg(next->debugreg[3], 3); /* no 4 and 5 */ set_debugreg(next->debugreg6, 6); set_debugreg(next->debugreg7, 7); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 09ecbde..313be40 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -471,10 +471,10 @@ static int genregs_set(struct task_struct *target, static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) { switch (n) { - case 0: return child->thread.debugreg0; - case 1: return child->thread.debugreg1; - case 2: return child->thread.debugreg2; - case 3: return child->thread.debugreg3; + case 0: return child->thread.debugreg[0]; + case 1: return child->thread.debugreg[1]; + case 2: return child->thread.debugreg[2]; + case 3: return child->thread.debugreg[3]; case 6: return child->thread.debugreg6; case 7: return child->thread.debugreg7; } @@ -493,10 +493,10 @@ static int ptrace_set_debugreg(struct task_struct *child, return -EIO; switch (n) { - case 0: child->thread.debugreg0 = data; break; - case 1: child->thread.debugreg1 = data; break; - case 2: child->thread.debugreg2 = data; break; - case 3: child->thread.debugreg3 = data; break; + case 0: child->thread.debugreg[0] = data; break; + case 1: child->thread.debugreg[1] = data; break; + case 2: child->thread.debugreg[2] = data; break; + case 3: child->thread.debugreg[3] = data; break; case 6: if ((data & ~0xffffffffUL) != 0) diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index ce702c5..5199139 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c @@ -84,10 +84,10 @@ static void fix_processor_context(void) * Now maybe reload the debug registers */ if (current->thread.debugreg7) { - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); /* no 4 and 5 */ set_debugreg(current->thread.debugreg6, 6); set_debugreg(current->thread.debugreg7, 7); diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c index 5343540..1e3bdcc 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu_64.c @@ -163,10 +163,10 @@ static void fix_processor_context(void) * Now maybe reload the debug registers */ if (current->thread.debugreg7){ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); /* no 4 and 5 */ loaddebug(¤t->thread, 6); loaddebug(¤t->thread, 7); diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h new file mode 100644 index 0000000..9bf2d12 --- /dev/null +++ b/include/asm-generic/hw_breakpoint.h @@ -0,0 +1,139 @@ +#ifndef _ASM_GENERIC_HW_BREAKPOINT_H +#define _ASM_GENERIC_HW_BREAKPOINT_H + +#ifndef __ARCH_HW_BREAKPOINT_H +#error "Please don't include this file directly" +#endif + +#ifdef __KERNEL__ +#include +#include +#include + +/** + * struct hw_breakpoint - unified kernel/user-space hardware breakpoint + * @triggered: callback invoked after target address access + * @info: arch-specific breakpoint info (address, length, and type) + * + * %hw_breakpoint structures are the kernel's way of representing + * hardware breakpoints. These are data breakpoints + * (also known as "watchpoints", triggered on data access), and the breakpoint's + * target address can be located in either kernel space or user space. + * + * The breakpoint's address, length, and type are highly + * architecture-specific. The values are encoded in the @info field; you + * specify them when registering the breakpoint. To examine the encoded + * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared + * below. + * + * The address is specified as a regular kernel pointer (for kernel-space + * breakponts) or as an %__user pointer (for user-space breakpoints). + * With register_user_hw_breakpoint(), the address must refer to a + * location in user space. The breakpoint will be active only while the + * requested task is running. Conversely with + * register_kernel_hw_breakpoint(), the address must refer to a location + * in kernel space, and the breakpoint will be active on all CPUs + * regardless of the current task. + * + * The length is the breakpoint's extent in bytes, which is subject to + * certain limitations. include/asm/hw_breakpoint.h contains macros + * defining the available lengths for a specific architecture. Note that + * the address's alignment must match the length. The breakpoint will + * catch accesses to any byte in the range from address to address + + * (length - 1). + * + * The breakpoint's type indicates the sort of access that will cause it + * to trigger. Possible values may include: + * + * %HW_BREAKPOINT_RW (triggered on read or write access), + * %HW_BREAKPOINT_WRITE (triggered on write access), and + * %HW_BREAKPOINT_READ (triggered on read access). + * + * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all + * possibilities are available on all architectures. Execute breakpoints + * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. + * + * When a breakpoint gets hit, the @triggered callback is + * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the + * processor registers. + * Data breakpoints occur after the memory access has taken place. + * Breakpoints are disabled during execution @triggered, to avoid + * recursive traps and allow unhindered access to breakpointed memory. + * + * This sample code sets a breakpoint on pid_max and registers a callback + * function for writes to that variable. Note that it is not portable + * as written, because not all architectures support HW_BREAKPOINT_LEN_4. + * + * ---------------------------------------------------------------------- + * + * #include + * + * struct hw_breakpoint my_bp; + * + * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) + * { + * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); + * dump_stack(); + * ............... + * } + * + * static struct hw_breakpoint my_bp; + * + * static int init_module(void) + * { + * ...................... + * my_bp.info.type = HW_BREAKPOINT_WRITE; + * my_bp.info.len = HW_BREAKPOINT_LEN_4; + * + * my_bp.installed = (void *)my_bp_installed; + * + * rc = register_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * static void cleanup_module(void) + * { + * ...................... + * unregister_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * ---------------------------------------------------------------------- + */ +struct hw_breakpoint { + void (*triggered)(struct hw_breakpoint *, struct pt_regs *); + struct arch_hw_breakpoint info; +}; + +/* + * len and type values are defined in include/asm/hw_breakpoint.h. + * Available values vary according to the architecture. On i386 the + * possibilities are: + * + * HW_BREAKPOINT_LEN_1 + * HW_BREAKPOINT_LEN_2 + * HW_BREAKPOINT_LEN_4 + * HW_BREAKPOINT_RW + * HW_BREAKPOINT_READ + * + * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the + * 1-, 2-, and 4-byte lengths may be unavailable. There also may be + * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. + */ + +extern int register_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern int modify_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern void unregister_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +/* + * Kernel breakpoints are not associated with any particular thread. + */ +extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); + +extern unsigned int hbp_kernel_pos; + +#endif /* __KERNEL__ */ +#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */ -- cgit v0.10.2 From 62a038d34db26771756cf3689e36de638bedd2c4 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:33 +0530 Subject: hw-breakpoints: introducing generic hardware breakpoint handler interfaces This patch introduces the generic Hardware Breakpoint interfaces for both user and kernel space requests. This core Api handles the hardware breakpoints through new helpers. It handles the user-space breakpoints and kernel breakpoints in front of arch implementation. One can choose kernel wide breakpoints using the following helpers and passing them a generic struct hw_breakpoint: - register_kernel_hw_breakpoint() - unregister_kernel_hw_breakpoint() - modify_kernel_hw_breakpoint() On the other side, you can choose per task breakpoints. - register_user_hw_breakpoint() - unregister_user_hw_breakpoint() - modify_user_hw_breakpoint() [ fweisbec@gmail.com: fix conflict against perfcounter ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker diff --git a/arch/Kconfig b/arch/Kconfig index 78a35e9..1adf2d0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -112,3 +112,7 @@ config HAVE_DMA_API_DEBUG config HAVE_DEFAULT_NO_SPIN_MUTEXES bool + +config HAVE_HW_BREAKPOINT + bool + diff --git a/kernel/Makefile b/kernel/Makefile index a35eee3..18ad111 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o +obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c new file mode 100644 index 0000000..c1f64e6 --- /dev/null +++ b/kernel/hw_breakpoint.c @@ -0,0 +1,378 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2007 Alan Stern + * Copyright (C) IBM Corporation, 2009 + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + * This file contains the arch-independent routines. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef CONFIG_X86 +#include +#endif +/* + * Spinlock that protects all (un)register operations over kernel/user-space + * breakpoint requests + */ +static DEFINE_SPINLOCK(hw_breakpoint_lock); + +/* Array of kernel-space breakpoint structures */ +struct hw_breakpoint *hbp_kernel[HBP_NUM]; + +/* + * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being + * modified but we need the older copy to handle any hbp exceptions. It will + * sync with hbp_kernel[] value after updation is done through IPIs. + */ +DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); + +/* + * Kernel breakpoints grow downwards, starting from HBP_NUM + * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for + * kernel-space request. We will initialise it here and not in an __init + * routine because load_debug_registers(), which uses this variable can be + * called very early during CPU initialisation. + */ +unsigned int hbp_kernel_pos = HBP_NUM; + +/* + * An array containing refcount of threads using a given bkpt register + * Accesses are synchronised by acquiring hw_breakpoint_lock + */ +unsigned int hbp_user_refcount[HBP_NUM]; + +/* + * Load the debug registers during startup of a CPU. + */ +void load_debug_registers(void) +{ + unsigned long flags; + struct task_struct *tsk = current; + + spin_lock_bh(&hw_breakpoint_lock); + + /* Prevent IPIs for new kernel breakpoint updates */ + local_irq_save(flags); + arch_update_kernel_hw_breakpoint(NULL); + local_irq_restore(flags); + + if (test_tsk_thread_flag(tsk, TIF_DEBUG)) + arch_install_thread_hw_breakpoint(tsk); + + spin_unlock_bh(&hw_breakpoint_lock); +} + +/* + * Erase all the hardware breakpoint info associated with a thread. + * + * If tsk != current then tsk must not be usable (for example, a + * child being cleaned up from a failed fork). + */ +void flush_thread_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *thread = &(tsk->thread); + + spin_lock_bh(&hw_breakpoint_lock); + + /* The thread no longer has any breakpoints associated with it */ + clear_tsk_thread_flag(tsk, TIF_DEBUG); + for (i = 0; i < HBP_NUM; i++) { + if (thread->hbp[i]) { + hbp_user_refcount[i]--; + kfree(thread->hbp[i]); + thread->hbp[i] = NULL; + } + } + + arch_flush_thread_hw_breakpoint(tsk); + + /* Actually uninstall the breakpoints if necessary */ + if (tsk == current) + arch_uninstall_thread_hw_breakpoint(); + spin_unlock_bh(&hw_breakpoint_lock); +} + +/* + * Copy the hardware breakpoint info from a thread to its cloned child. + */ +int copy_thread_hw_breakpoint(struct task_struct *tsk, + struct task_struct *child, unsigned long clone_flags) +{ + /* + * We will assume that breakpoint settings are not inherited + * and the child starts out with no debug registers set. + * But what about CLONE_PTRACE? + */ + clear_tsk_thread_flag(child, TIF_DEBUG); + + /* We will call flush routine since the debugregs are not inherited */ + arch_flush_thread_hw_breakpoint(child); + + return 0; +} + +static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk, + struct hw_breakpoint *bp) +{ + struct thread_struct *thread = &(tsk->thread); + int rc; + + /* Do not overcommit. Fail if kernel has used the hbp registers */ + if (pos >= hbp_kernel_pos) + return -ENOSPC; + + rc = arch_validate_hwbkpt_settings(bp, tsk); + if (rc) + return rc; + + thread->hbp[pos] = bp; + hbp_user_refcount[pos]++; + + arch_update_user_hw_breakpoint(pos, tsk); + /* + * Does it need to be installed right now? + * Otherwise it will get installed the next time tsk runs + */ + if (tsk == current) + arch_install_thread_hw_breakpoint(tsk); + + return rc; +} + +/* + * Modify the address of a hbp register already in use by the task + * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint() + */ +static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk, + struct hw_breakpoint *bp) +{ + struct thread_struct *thread = &(tsk->thread); + + if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk))) + return -EINVAL; + + if (thread->hbp[pos] == NULL) + return -EINVAL; + + thread->hbp[pos] = bp; + /* + * 'pos' must be that of a hbp register already used by 'tsk' + * Otherwise arch_modify_user_hw_breakpoint() will fail + */ + arch_update_user_hw_breakpoint(pos, tsk); + + if (tsk == current) + arch_install_thread_hw_breakpoint(tsk); + + return 0; +} + +static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk) +{ + hbp_user_refcount[pos]--; + tsk->thread.hbp[pos] = NULL; + + arch_update_user_hw_breakpoint(pos, tsk); + + if (tsk == current) + arch_install_thread_hw_breakpoint(tsk); +} + +/** + * register_user_hw_breakpoint - register a hardware breakpoint for user space + * @tsk: pointer to 'task_struct' of the process to which the address belongs + * @bp: the breakpoint structure to register + * + * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and + * @bp->triggered must be set properly before invocation + * + */ +int register_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp) +{ + struct thread_struct *thread = &(tsk->thread); + int i, rc = -ENOSPC; + + spin_lock_bh(&hw_breakpoint_lock); + + for (i = 0; i < hbp_kernel_pos; i++) { + if (!thread->hbp[i]) { + rc = __register_user_hw_breakpoint(i, tsk, bp); + break; + } + } + if (!rc) + set_tsk_thread_flag(tsk, TIF_DEBUG); + + spin_unlock_bh(&hw_breakpoint_lock); + return rc; +} +EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); + +/** + * modify_user_hw_breakpoint - modify a user-space hardware breakpoint + * @tsk: pointer to 'task_struct' of the process to which the address belongs + * @bp: the breakpoint structure to unregister + * + */ +int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp) +{ + struct thread_struct *thread = &(tsk->thread); + int i, ret = -ENOENT; + + spin_lock_bh(&hw_breakpoint_lock); + for (i = 0; i < hbp_kernel_pos; i++) { + if (bp == thread->hbp[i]) { + ret = __modify_user_hw_breakpoint(i, tsk, bp); + break; + } + } + spin_unlock_bh(&hw_breakpoint_lock); + return ret; +} +EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); + +/** + * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint + * @tsk: pointer to 'task_struct' of the process to which the address belongs + * @bp: the breakpoint structure to unregister + * + */ +void unregister_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp) +{ + struct thread_struct *thread = &(tsk->thread); + int i, pos = -1, hbp_counter = 0; + + spin_lock_bh(&hw_breakpoint_lock); + for (i = 0; i < hbp_kernel_pos; i++) { + if (thread->hbp[i]) + hbp_counter++; + if (bp == thread->hbp[i]) + pos = i; + } + if (pos >= 0) { + __unregister_user_hw_breakpoint(pos, tsk); + hbp_counter--; + } + if (!hbp_counter) + clear_tsk_thread_flag(tsk, TIF_DEBUG); + + spin_unlock_bh(&hw_breakpoint_lock); +} +EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint); + +/** + * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space + * @bp: the breakpoint structure to register + * + * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and + * @bp->triggered must be set properly before invocation + * + */ +int register_kernel_hw_breakpoint(struct hw_breakpoint *bp) +{ + int rc; + + rc = arch_validate_hwbkpt_settings(bp, NULL); + if (rc) + return rc; + + spin_lock_bh(&hw_breakpoint_lock); + + rc = -ENOSPC; + /* Check if we are over-committing */ + if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) { + hbp_kernel_pos--; + hbp_kernel[hbp_kernel_pos] = bp; + on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); + rc = 0; + } + + spin_unlock_bh(&hw_breakpoint_lock); + return rc; +} +EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint); + +/** + * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space + * @bp: the breakpoint structure to unregister + * + * Uninstalls and unregisters @bp. + */ +void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp) +{ + int i, j; + + spin_lock_bh(&hw_breakpoint_lock); + + /* Find the 'bp' in our list of breakpoints for kernel */ + for (i = hbp_kernel_pos; i < HBP_NUM; i++) + if (bp == hbp_kernel[i]) + break; + + /* Check if we did not find a match for 'bp'. If so return early */ + if (i == HBP_NUM) { + spin_unlock_bh(&hw_breakpoint_lock); + return; + } + + /* + * We'll shift the breakpoints one-level above to compact if + * unregistration creates a hole + */ + for (j = i; j > hbp_kernel_pos; j--) + hbp_kernel[j] = hbp_kernel[j-1]; + + hbp_kernel[hbp_kernel_pos] = NULL; + on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); + hbp_kernel_pos++; + + spin_unlock_bh(&hw_breakpoint_lock); +} +EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint); + +static struct notifier_block hw_breakpoint_exceptions_nb = { + .notifier_call = hw_breakpoint_exceptions_notify, + /* we need to be notified first */ + .priority = 0x7fffffff +}; + +static int __init init_hw_breakpoint(void) +{ + return register_die_notifier(&hw_breakpoint_exceptions_nb); +} + +core_initcall(init_hw_breakpoint); -- cgit v0.10.2 From 0067f1297241ea567f2b22a455519752d70fcca9 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:57 +0530 Subject: hw-breakpoints: x86 architecture implementation of Hardware Breakpoint interfaces This patch introduces the arch-specific implementation of the generic hardware breakpoints in kernel/hw_breakpoint.c inside x86 specific directories. It contains functions which help to validate and serve requests using Hardware Breakpoint registers on x86 processors. [ fweisbec@gmail.com: fix conflict against kmemcheck ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df9e885..3033375 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -46,6 +46,7 @@ config X86 select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA + select HAVE_HW_BREAKPOINT config ARCH_DEFCONFIG string diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 77df4d6..cbc7818 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,7 +36,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o +obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 0000000..4867c9f --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c @@ -0,0 +1,382 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2007 Alan Stern + * Copyright (C) 2009 IBM Corporation + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Unmasked kernel DR7 value */ +static unsigned long kdr7; + +/* + * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. + * Used to clear and verify the status of bits corresponding to DR0 - DR3 + */ +static const unsigned long dr7_masks[HBP_NUM] = { + 0x000f0003, /* LEN0, R/W0, G0, L0 */ + 0x00f0000c, /* LEN1, R/W1, G1, L1 */ + 0x0f000030, /* LEN2, R/W2, G2, L2 */ + 0xf00000c0 /* LEN3, R/W3, G3, L3 */ +}; + + +/* + * Encode the length, type, Exact, and Enable bits for a particular breakpoint + * as stored in debug register 7. + */ +static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + unsigned long bp_info; + + bp_info = (len | type) & 0xf; + bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); + bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) | + DR_GLOBAL_SLOWDOWN; + return bp_info; +} + +void arch_update_kernel_hw_breakpoint(void *unused) +{ + struct hw_breakpoint *bp; + int i, cpu = get_cpu(); + unsigned long temp_kdr7 = 0; + + /* Don't allow debug exceptions while we update the registers */ + set_debugreg(0UL, 7); + + for (i = hbp_kernel_pos; i < HBP_NUM; i++) { + per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; + if (bp) { + temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type); + set_debugreg(bp->info.address, i); + } + } + + /* No need to set DR6. Update the debug registers with kernel-space + * breakpoint values from kdr7 and user-space requests from the + * current process + */ + kdr7 = temp_kdr7; + set_debugreg(kdr7 | current->thread.debugreg7, 7); + put_cpu_no_resched(); +} + +/* + * Install the thread breakpoints in their debug registers. + */ +void arch_install_thread_hw_breakpoint(struct task_struct *tsk) +{ + struct thread_struct *thread = &(tsk->thread); + + switch (hbp_kernel_pos) { + case 4: + set_debugreg(thread->debugreg[3], 3); + case 3: + set_debugreg(thread->debugreg[2], 2); + case 2: + set_debugreg(thread->debugreg[1], 1); + case 1: + set_debugreg(thread->debugreg[0], 0); + default: + break; + } + + /* No need to set DR6 */ + set_debugreg((kdr7 | thread->debugreg7), 7); +} + +/* + * Install the debug register values for just the kernel, no thread. + */ +void arch_uninstall_thread_hw_breakpoint() +{ + /* Clear the user-space portion of debugreg7 by setting only kdr7 */ + set_debugreg(kdr7, 7); + +} + +static int get_hbp_len(u8 hbp_len) +{ + unsigned int len_in_bytes = 0; + + switch (hbp_len) { + case HW_BREAKPOINT_LEN_1: + len_in_bytes = 1; + break; + case HW_BREAKPOINT_LEN_2: + len_in_bytes = 2; + break; + case HW_BREAKPOINT_LEN_4: + len_in_bytes = 4; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + len_in_bytes = 8; + break; +#endif + } + return len_in_bytes; +} + +/* + * Check for virtual address in user space. + */ +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va <= TASK_SIZE - len); +} + +/* + * Check for virtual address in kernel space. + */ +int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Store a breakpoint's encoded address, length, and type. + */ +static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) +{ + /* + * User-space requests will always have the address field populated + * Symbol names from user-space are rejected + */ + if (tsk && bp->info.name) + return -EINVAL; + /* + * For kernel-addresses, either the address or symbol name can be + * specified. + */ + if (bp->info.name) + bp->info.address = (unsigned long) + kallsyms_lookup_name(bp->info.name); + if (bp->info.address) + return 0; + return -EINVAL; +} + +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, + struct task_struct *tsk) +{ + unsigned int align; + int ret = -EINVAL; + + switch (bp->info.type) { + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + case HW_BREAKPOINT_EXECUTE: + if ((!arch_check_va_in_userspace(bp->info.address, + bp->info.len)) && + bp->info.len != HW_BREAKPOINT_LEN_EXECUTE) + return ret; + break; + case HW_BREAKPOINT_WRITE: + break; + case HW_BREAKPOINT_RW: + break; + default: + return ret; + } + + switch (bp->info.len) { + case HW_BREAKPOINT_LEN_1: + align = 0; + break; + case HW_BREAKPOINT_LEN_2: + align = 1; + break; + case HW_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + align = 7; + break; +#endif + default: + return ret; + } + + if (bp->triggered) + ret = arch_store_info(bp, tsk); + + if (ret < 0) + return ret; + /* + * Check that the low-order bits of the address are appropriate + * for the alignment implied by len. + */ + if (bp->info.address & align) + return -EINVAL; + + /* Check that the virtual address is in the proper range */ + if (tsk) { + if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) + return -EFAULT; + } else { + if (!arch_check_va_in_kernelspace(bp->info.address, + bp->info.len)) + return -EFAULT; + } + return 0; +} + +void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) +{ + struct thread_struct *thread = &(tsk->thread); + struct hw_breakpoint *bp = thread->hbp[pos]; + + thread->debugreg7 &= ~dr7_masks[pos]; + if (bp) { + thread->debugreg[pos] = bp->info.address; + thread->debugreg7 |= encode_dr7(pos, bp->info.len, + bp->info.type); + } else + thread->debugreg[pos] = 0; +} + +void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *thread = &(tsk->thread); + + thread->debugreg7 = 0; + for (i = 0; i < HBP_NUM; i++) + thread->debugreg[i] = 0; +} + +/* + * Handle debug exception notifications. + * + * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. + * + * NOTIFY_DONE returned if one of the following conditions is true. + * i) When the causative address is from user-space and the exception + * is a valid one, i.e. not triggered as a result of lazy debug register + * switching + * ii) When there are more bits than trap set in DR6 register (such + * as BD, BS or BT) indicating that more than one debug condition is + * met and requires some more action in do_debug(). + * + * NOTIFY_STOP returned for all other cases + * + */ +int __kprobes hw_breakpoint_handler(struct die_args *args) +{ + int i, cpu, rc = NOTIFY_STOP; + struct hw_breakpoint *bp; + /* The DR6 value is stored in args->err */ + unsigned long dr7, dr6 = args->err; + + /* Do an early return if no trap bits are set in DR6 */ + if ((dr6 & DR_TRAP_BITS) == 0) + return NOTIFY_DONE; + + /* Lazy debug register switching */ + if (!test_tsk_thread_flag(current, TIF_DEBUG)) + arch_uninstall_thread_hw_breakpoint(); + + get_debugreg(dr7, 7); + /* Disable breakpoints during exception handling */ + set_debugreg(0UL, 7); + /* + * Assert that local interrupts are disabled + * Reset the DRn bits in the virtualized register value. + * The ptrace trigger routine will add in whatever is needed. + */ + current->thread.debugreg6 &= ~DR_TRAP_BITS; + cpu = get_cpu(); + + /* Handle all the breakpoints that were triggered */ + for (i = 0; i < HBP_NUM; ++i) { + if (likely(!(dr6 & (DR_TRAP0 << i)))) + continue; + /* + * Find the corresponding hw_breakpoint structure and + * invoke its triggered callback. + */ + if (i >= hbp_kernel_pos) + bp = per_cpu(this_hbp_kernel[i], cpu); + else { + bp = current->thread.hbp[i]; + if (bp) + rc = NOTIFY_DONE; + } + /* + * bp can be NULL due to lazy debug register switching + * or due to the delay between updates of hbp_kernel_pos + * and this_hbp_kernel. + */ + if (!bp) + continue; + + (bp->triggered)(bp, args->regs); + } + if (dr6 & (~DR_TRAP_BITS)) + rc = NOTIFY_DONE; + + set_debugreg(dr7, 7); + put_cpu_no_resched(); + return rc; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( + struct notifier_block *unused, unsigned long val, void *data) +{ + if (val != DIE_DEBUG) + return NOTIFY_DONE; + + return hw_breakpoint_handler(data); +} -- cgit v0.10.2 From 08d68323d1f0c34452e614263b212ca556dae47f Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:44:08 +0530 Subject: hw-breakpoints: modifying generic debug exception to use thread-specific debug registers This patch modifies the breakpoint exception handler code to use the new abstract debug register names. [ fweisbec@gmail.com: fix conflict against kmemcheck ] [ Impact: refactor and cleanup x86 debug exception handler ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a1d2883..de99132 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -529,73 +529,52 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - unsigned long condition; + unsigned long dr6; int si_code; - get_debugreg(condition, 6); + get_debugreg(dr6, 6); + /* DR6 may or may not be cleared by the CPU */ + set_debugreg(0, 6); /* * The processor cleared BTF, so don't mark that we need it set. */ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; - if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, + /* Store the virtualized DR6 value */ + tsk->thread.debugreg6 = dr6; + + if (notify_die(DIE_DEBUG, "debug", regs, dr6, error_code, SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg7) - goto clear_dr7; + if (regs->flags & X86_VM_MASK) { + handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, 1); + return; } -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto debug_vm86; -#endif - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg6 = condition; - /* - * Single-stepping through TF: make sure we ignore any events in - * kernel space (but re-enable TF when returning to user mode). + * Single-stepping through system calls: ignore any exceptions in + * kernel space, but re-enable TF when returning to user mode. + * + * We already checked v86 mode above, so we can check for kernel mode + * by just checking the CPL of CS. */ - if (condition & DR_STEP) { - if (!user_mode(regs)) - goto clear_TF_reenable; + if ((dr6 & DR_STEP) && !user_mode(regs)) { + tsk->thread.debugreg6 &= ~DR_STEP; + set_tsk_thread_flag(tsk, TIF_SINGLESTEP); + regs->flags &= ~X86_EFLAGS_TF; } - - si_code = get_si_code(condition); - /* Ok, finally something we can handle */ - send_sigtrap(tsk, regs, error_code, si_code); - - /* - * Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ -clear_dr7: - set_debugreg(0, 7); + si_code = get_si_code(tsk->thread.debugreg6); + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) + send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); - return; -#ifdef CONFIG_X86_32 -debug_vm86: - /* reenable preemption: handle_vm86_trap() might sleep */ - dec_preempt_count(); - handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - conditional_cli(regs); - return; -#endif - -clear_TF_reenable: - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; - preempt_conditional_cli(regs); return; } -- cgit v0.10.2 From 1e3500666f7c5daaadadb8431a2927cdbbdb7dd4 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:44:26 +0530 Subject: hw-breakpoints: use wrapper routines around debug registers in processor related functions This patch enables the use of wrapper routines to access the debug/breakpoint registers on cpu management. The hardcoded debug registers save and restore operations for threads breakpoints are replaced by wrappers. And now that we handle the kernel breakpoints too, we also need to handle them on cpu hotplug operations. [ Impact: adapt new hardware breakpoint api to cpu hotplug ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 58d24ef..2b2652d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -326,6 +327,7 @@ notrace static void __cpuinit start_secondary(void *unused) setup_secondary_clock(); wmb(); + load_debug_registers(); cpu_idle(); } @@ -1250,6 +1252,7 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); + hw_breakpoint_disable(); } int native_cpu_disable(void) diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index 5199139..2bc3b01 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c @@ -13,6 +13,7 @@ #include #include #include +#include static struct saved_context saved_context; @@ -48,6 +49,7 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr2 = read_cr2(); ctxt->cr3 = read_cr3(); ctxt->cr4 = read_cr4_safe(); + hw_breakpoint_disable(); } /* Needed by apm.c */ @@ -83,16 +85,7 @@ static void fix_processor_context(void) /* * Now maybe reload the debug registers */ - if (current->thread.debugreg7) { - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); - /* no 4 and 5 */ - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } - + load_debug_registers(); } static void __restore_processor_state(struct saved_context *ctxt) diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c index 1e3bdcc..46866a1 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu_64.c @@ -16,6 +16,7 @@ #include #include #include +#include static void fix_processor_context(void); @@ -71,6 +72,7 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr3 = read_cr3(); ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); + hw_breakpoint_disable(); } void save_processor_state(void) @@ -162,13 +164,5 @@ static void fix_processor_context(void) /* * Now maybe reload the debug registers */ - if (current->thread.debugreg7){ - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); - } + load_debug_registers(); } -- cgit v0.10.2 From 66cb5917295958652ff6ba36d83f98f2379c46b4 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:44:55 +0530 Subject: hw-breakpoints: use the new wrapper routines to access debug registers in process/thread code This patch enables the use of abstract debug registers in process-handling routines, according to the new hardware breakpoint Api. [ Impact: adapt thread breakpoints handling code to the new breakpoint Api ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 291527c..19a686c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -46,6 +48,8 @@ void free_thread_xstate(struct task_struct *tsk) kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); tsk->thread.xstate = NULL; } + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + flush_thread_hw_breakpoint(tsk); WARN(tsk->thread.ds_ctx, "leaking DS context\n"); } @@ -106,12 +110,8 @@ void flush_thread(void) clear_tsk_thread_flag(tsk, TIF_DEBUG); - tsk->thread.debugreg[0] = 0; - tsk->thread.debugreg[1] = 0; - tsk->thread.debugreg[2] = 0; - tsk->thread.debugreg[3] = 0; - tsk->thread.debugreg6 = 0; - tsk->thread.debugreg7 = 0; + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + flush_thread_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -193,16 +193,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else if (next->debugctlmsr != prev->debugctlmsr) update_debugctlmsr(next->debugctlmsr); - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg[0], 0); - set_debugreg(next->debugreg[1], 1); - set_debugreg(next->debugreg[2], 2); - set_debugreg(next->debugreg[3], 3); - /* no 4 and 5 */ - set_debugreg(next->debugreg6, 6); - set_debugreg(next->debugreg7, 7); - } - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b5e4bfe..297ffff 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -61,6 +61,8 @@ #include #include #include +#include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -265,7 +267,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, task_user_gs(p) = get_user_gs(regs); + p->thread.io_bitmap_ptr = NULL; tsk = current; + err = -ENOMEM; + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) + goto out; + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); @@ -285,10 +293,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); +out: if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + if (err) + flush_thread_hw_breakpoint(p); clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); p->thread.ds_ctx = NULL; @@ -427,6 +438,23 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) lazy_load_gs(next->gs); percpu_write(current_task, next_p); + /* + * There's a problem with moving the arch_install_thread_hw_breakpoint() + * call before current is updated. Suppose a kernel breakpoint is + * triggered in between the two, the hw-breakpoint handler will see that + * the 'current' task does not have TIF_DEBUG flag set and will think it + * is leftover from an old task (lazy switching) and will erase it. Then + * until the next context switch, no user-breakpoints will be installed. + * + * The real problem is that it's impossible to update both current and + * physical debug registers at the same instant, so there will always be + * a window in which they disagree and a breakpoint might get triggered. + * Since we use lazy switching, we are forced to assume that a + * disagreement means that current is correct and the exception is due + * to lazy debug register switching. + */ + if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) + arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5a1a1de..f7b276d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -55,6 +55,8 @@ #include #include #include +#include +#include asmlinkage extern void ret_from_fork(void); @@ -248,6 +250,8 @@ void release_thread(struct task_struct *dead_task) BUG(); } } + if (unlikely(dead_task->thread.debugreg7)) + flush_thread_hw_breakpoint(dead_task); } static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) @@ -303,12 +307,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; + p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); savesegment(fs, p->thread.fsindex); savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + err = -ENOMEM; + if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) + if (copy_thread_hw_breakpoint(me, p, clone_flags)) + goto out; + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -347,6 +357,9 @@ out: kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + if (err) + flush_thread_hw_breakpoint(p); + return err; } @@ -492,6 +505,24 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); + /* + * There's a problem with moving the arch_install_thread_hw_breakpoint() + * call before current is updated. Suppose a kernel breakpoint is + * triggered in between the two, the hw-breakpoint handler will see that + * the 'current' task does not have TIF_DEBUG flag set and will think it + * is leftover from an old task (lazy switching) and will erase it. Then + * until the next context switch, no user-breakpoints will be installed. + * + * The real problem is that it's impossible to update both current and + * physical debug registers at the same instant, so there will always be + * a window in which they disagree and a breakpoint might get triggered. + * Since we use lazy switching, we are forced to assume that a + * disagreement means that current is correct and the exception is due + * to lazy debug register switching. + */ + if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) + arch_install_thread_hw_breakpoint(next_p); + return prev_p; } -- cgit v0.10.2 From da0cdc14f5f7e0faee6b2393fefed056cdb17146 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:45:03 +0530 Subject: hw-breakpoints: modify signal handling code to refrain from re-enabling HW Breakpoints This patch disables re-enabling of Hardware Breakpoint registers through the signal handling code. This is now done during from hw_breakpoint_handler(). Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 1442516..f33d2e0 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -800,15 +800,6 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* - * Re-enable any watchpoints before delivering the - * signal to user space. The processor register will - * have been cleared if the watchpoint triggered - * inside the kernel. - */ - if (current->thread.debugreg7) - set_debugreg(current->thread.debugreg7, 7); - /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* -- cgit v0.10.2 From 72f674d203cd230426437cdcf7dd6f681dad8b0d Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:45:48 +0530 Subject: hw-breakpoints: modify Ptrace routines to access breakpoint registers This patch modifies the ptrace code to use the new wrapper routines around the debug/breakpoint registers. [ Impact: adapt x86 ptrace to the new breakpoint Api ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Signed-off-by: Maneesh Soni Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 313be40..b457f78 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -136,11 +137,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ - return TASK_SIZE - 3; -} - #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) @@ -265,15 +261,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - return IA32_PAGE_OFFSET - 3; -#endif - return TASK_SIZE_MAX - 7; -} - #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -464,95 +451,159 @@ static int genregs_set(struct task_struct *target, } /* - * This function is trivial and will be inlined by the compiler. - * Having it separates the implementation details of debug - * registers from the interface details of ptrace. + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. */ -static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) +static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, + unsigned *type) { - switch (n) { - case 0: return child->thread.debugreg[0]; - case 1: return child->thread.debugreg[1]; - case 2: return child->thread.debugreg[2]; - case 3: return child->thread.debugreg[3]; - case 6: return child->thread.debugreg6; - case 7: return child->thread.debugreg7; - } - return 0; + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); + + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; } -static int ptrace_set_debugreg(struct task_struct *child, - int n, unsigned long data) +static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) { + struct thread_struct *thread = &(current->thread); int i; - if (unlikely(n == 4 || n == 5)) - return -EIO; + /* + * Store in the virtual DR6 register the fact that the breakpoint + * was hit so the thread's debugger will see it. + */ + for (i = 0; i < hbp_kernel_pos; i++) + /* + * We will check bp->info.address against the address stored in + * thread's hbp structure and not debugreg[i]. This is to ensure + * that the corresponding bit for 'i' in DR7 register is enabled + */ + if (bp->info.address == thread->hbp[i]->info.address) + break; - if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) - return -EIO; + thread->debugreg6 |= (DR_TRAP0 << i); +} - switch (n) { - case 0: child->thread.debugreg[0] = data; break; - case 1: child->thread.debugreg[1] = data; break; - case 2: child->thread.debugreg[2] = data; break; - case 3: child->thread.debugreg[3] = data; break; +/* + * Handle ptrace writes to debug register 7. + */ +static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long old_dr7 = thread->debugreg7; + int i, orig_ret = 0, rc = 0; + int enabled, second_pass = 0; + unsigned len, type; + struct hw_breakpoint *bp; + + data &= ~DR_CONTROL_RESERVED; +restore: + /* + * Loop through all the hardware breakpoints, making the + * appropriate changes to each. + */ + for (i = 0; i < HBP_NUM; i++) { + enabled = decode_dr7(data, i, &len, &type); + bp = thread->hbp[i]; + + if (!enabled) { + if (bp) { + /* Don't unregister the breakpoints right-away, + * unless all register_user_hw_breakpoint() + * requests have succeeded. This prevents + * any window of opportunity for debug + * register grabbing by other users. + */ + if (!second_pass) + continue; + unregister_user_hw_breakpoint(tsk, bp); + kfree(bp); + } + continue; + } + if (!bp) { + rc = -ENOMEM; + bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); + if (bp) { + bp->info.address = thread->debugreg[i]; + bp->triggered = ptrace_triggered; + bp->info.len = len; + bp->info.type = type; + rc = register_user_hw_breakpoint(tsk, bp); + if (rc) + kfree(bp); + } + } else + rc = modify_user_hw_breakpoint(tsk, bp); + if (rc) + break; + } + /* + * Make a second pass to free the remaining unused breakpoints + * or to restore the original breakpoints if an error occurred. + */ + if (!second_pass) { + second_pass = 1; + if (rc < 0) { + orig_ret = rc; + data = old_dr7; + } + goto restore; + } + return ((orig_ret < 0) ? orig_ret : rc); +} - case 6: - if ((data & ~0xffffffffUL) != 0) - return -EIO; - child->thread.debugreg6 = data; - break; +/* + * Handle PTRACE_PEEKUSR calls for the debug register area. + */ +unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long val = 0; + + if (n < HBP_NUM) + val = thread->debugreg[n]; + else if (n == 6) + val = thread->debugreg6; + else if (n == 7) + val = thread->debugreg7; + return val; +} - case 7: - /* - * Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System Programming) - */ -#ifdef CONFIG_X86_32 -#define DR7_MASK 0x5f54 -#else -#define DR7_MASK 0x5554 -#endif - data &= ~DR_CONTROL_RESERVED; - for (i = 0; i < 4; i++) - if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) - return -EIO; - child->thread.debugreg7 = data; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - break; +/* + * Handle PTRACE_POKEUSR calls for the debug register area. + */ +int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) +{ + struct thread_struct *thread = &(tsk->thread); + int rc = 0; + + /* There are no DR4 or DR5 registers */ + if (n == 4 || n == 5) + return -EIO; + + if (n == 6) { + tsk->thread.debugreg6 = val; + goto ret_path; } + if (n < HBP_NUM) { + if (thread->hbp[n]) { + if (arch_check_va_in_userspace(val, + thread->hbp[n]->info.len) == 0) { + rc = -EIO; + goto ret_path; + } + thread->hbp[n]->info.address = val; + } + thread->debugreg[n] = val; + } + /* All that's left is DR7 */ + if (n == 7) + rc = ptrace_write_dr7(tsk, val); - return 0; +ret_path: + return rc; } /* -- cgit v0.10.2 From 17f557e5b5d43a2af66c969f6560ac7105020672 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:46:03 +0530 Subject: hw-breakpoints: cleanup HW Breakpoint registers before kexec This patch disables Hardware breakpoints before doing a 'kexec' on the machine so that the cpu doesn't keep debug registers values which would be out of sync for the new image. Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d..c843f84 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -25,6 +25,7 @@ #include #include #include +#include static void set_idt(void *newidt, __u16 limit) { @@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf2..4a8bb82 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -18,6 +18,7 @@ #include #include #include +#include static int init_one_level2_page(struct kimage *image, pgd_t *pgd, unsigned long addr) @@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC -- cgit v0.10.2 From 432039933a16b8227b7b267f46ac1c1b9b3adf14 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:46:20 +0530 Subject: hw-breakpoints: sample HW breakpoint over kernel data address This patch introduces a sample kernel module to demonstrate the use of Hardware Breakpoint feature. It places a breakpoint over the kernel variable 'pid_max' to monitor all write operations and emits a function-backtrace when done. Signed-off-by: K.Prasad Signed-off-by: Frederic Weisbecker diff --git a/samples/Kconfig b/samples/Kconfig index b75d28c..8458516 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -45,5 +45,11 @@ config SAMPLE_KRETPROBES default m depends on SAMPLE_KPROBES && KRETPROBES +config SAMPLE_HW_BREAKPOINT + tristate "Build kernel hardware breakpoint examples -- loadable module only" + depends on HAVE_HW_BREAKPOINT && m + help + This builds kernel hardware breakpoint example modules. + endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index 13e4b47..42e1755 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,3 +1,4 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ trace_events/ +obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ \ + trace_events/ hw_breakpoint/ diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile new file mode 100644 index 0000000..0f5c31c --- /dev/null +++ b/samples/hw_breakpoint/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c new file mode 100644 index 0000000..9cbdbb8 --- /dev/null +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -0,0 +1,83 @@ +/* + * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * usage: insmod data_breakpoint.ko ksym= + * + * This file is a kernel module that places a breakpoint over ksym_name kernel + * variable using Hardware Breakpoint register. The corresponding handler which + * prints a backtrace is invoked everytime a write operation is performed on + * that variable. + * + * Copyright (C) IBM Corporation, 2009 + */ +#include /* Needed by all modules */ +#include /* Needed for KERN_INFO */ +#include /* Needed for the macros */ + +#include + +struct hw_breakpoint sample_hbp; + +static char ksym_name[KSYM_NAME_LEN] = "pid_max"; +module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); +MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" + " write operations on the kernel symbol"); + +void sample_hbp_handler(struct hw_breakpoint *temp, struct pt_regs + *temp_regs) +{ + printk(KERN_INFO "%s value is changed\n", ksym_name); + dump_stack(); + printk(KERN_INFO "Dump stack from sample_hbp_handler\n"); +} + +static int __init hw_break_module_init(void) +{ + int ret; + +#ifdef CONFIG_X86 + sample_hbp.info.name = ksym_name; + sample_hbp.info.type = HW_BREAKPOINT_WRITE; + sample_hbp.info.len = HW_BREAKPOINT_LEN_4; +#endif /* CONFIG_X86 */ + + sample_hbp.triggered = (void *)sample_hbp_handler; + + ret = register_kernel_hw_breakpoint(&sample_hbp); + + if (ret < 0) { + printk(KERN_INFO "Breakpoint registration failed\n"); + return ret; + } else + printk(KERN_INFO "HW Breakpoint for %s write installed\n", + ksym_name); + + return 0; +} + +static void __exit hw_break_module_exit(void) +{ + unregister_kernel_hw_breakpoint(&sample_hbp); + printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name); +} + +module_init(hw_break_module_init); +module_exit(hw_break_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("K.Prasad"); +MODULE_DESCRIPTION("ksym breakpoint"); -- cgit v0.10.2 From 0722db015c246204044299eae3b02d18d3ca4faf Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:46:40 +0530 Subject: hw-breakpoints: ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces This patch adds an ftrace plugin to detect and profile memory access over kernel variables. It uses HW Breakpoint interfaces to 'watch memory addresses. Signed-off-by: K.Prasad Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index a508b9d..d7f01e6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -314,6 +314,27 @@ config POWER_TRACER power management decisions, specifically the C-state and P-state behavior. +config KSYM_TRACER + bool "Trace read and write access on kernel memory locations" + depends on HAVE_HW_BREAKPOINT + select TRACING + help + This tracer helps find read and write operations on any given kernel + symbol i.e. /proc/kallsyms. + +config PROFILE_KSYM_TRACER + bool "Profile all kernel memory accesses on 'watched' variables" + depends on KSYM_TRACER + help + This tracer profiles kernel accesses on variables watched through the + ksym tracer ftrace plugin. Depending upon the hardware, all read + and write operations on kernel variables can be monitored for + accesses. + + The results will be displayed in: + /debugfs/tracing/profile_ksym + + Say N if unsure. config STACK_TRACER bool "Trace max stack" diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 06b8585..658aace 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -51,5 +51,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o +obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6e735d4..7d5cc37 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -15,6 +15,10 @@ #include #include +#ifdef CONFIG_KSYM_TRACER +#include +#endif + enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -40,6 +44,7 @@ enum trace_type { TRACE_KMEM_FREE, TRACE_POWER, TRACE_BLK, + TRACE_KSYM, __TRACE_LAST_TYPE, }; @@ -207,6 +212,21 @@ struct syscall_trace_exit { unsigned long ret; }; +#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy" +extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); + +struct trace_ksym { + struct trace_entry ent; + struct hw_breakpoint *ksym_hbp; + unsigned long ksym_addr; + unsigned long ip; +#ifdef CONFIG_PROFILE_KSYM_TRACER + unsigned long counter; +#endif + struct hlist_node ksym_hlist; + char ksym_name[KSYM_NAME_LEN]; + char p_name[TASK_COMM_LEN]; +}; /* * trace_flag_type is an enumeration that holds different @@ -323,6 +343,7 @@ extern void __ftrace_bad_type(void); TRACE_SYSCALL_ENTER); \ IF_ASSIGN(var, ent, struct syscall_trace_exit, \ TRACE_SYSCALL_EXIT); \ + IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \ __ftrace_bad_type(); \ } while (0) @@ -540,6 +561,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_hw_branches(struct tracer *trace, struct trace_array *tr); +extern int trace_selftest_startup_ksym(struct tracer *trace, + struct trace_array *tr); #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c new file mode 100644 index 0000000..11c74f6 --- /dev/null +++ b/kernel/trace/trace_ksym.c @@ -0,0 +1,525 @@ +/* + * trace_ksym.c - Kernel Symbol Tracer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +#include +#include +#include +#include +#include +#include + +#include "trace_output.h" +#include "trace_stat.h" +#include "trace.h" + +/* For now, let us restrict the no. of symbols traced simultaneously to number + * of available hardware breakpoint registers. + */ +#define KSYM_TRACER_MAX HBP_NUM + +#define KSYM_TRACER_OP_LEN 3 /* rw- */ +#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1) + +static struct trace_array *ksym_trace_array; + +static unsigned int ksym_filter_entry_count; +static unsigned int ksym_tracing_enabled; + +static HLIST_HEAD(ksym_filter_head); + +#ifdef CONFIG_PROFILE_KSYM_TRACER + +#define MAX_UL_INT 0xffffffff + +static DEFINE_MUTEX(ksym_tracer_mutex); + +void ksym_collect_stats(unsigned long hbp_hit_addr) +{ + struct hlist_node *node; + struct trace_ksym *entry; + + rcu_read_lock(); + hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { + if ((entry->ksym_addr == hbp_hit_addr) && + (entry->counter <= MAX_UL_INT)) { + entry->counter++; + break; + } + } + rcu_read_unlock(); +} +#endif /* CONFIG_PROFILE_KSYM_TRACER */ + +void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) +{ + struct ring_buffer_event *event; + struct trace_array *tr; + struct trace_ksym *entry; + int pc; + + if (!ksym_tracing_enabled) + return; + + tr = ksym_trace_array; + pc = preempt_count(); + + event = trace_buffer_lock_reserve(tr, TRACE_KSYM, + sizeof(*entry), 0, pc); + if (!event) + return; + + entry = ring_buffer_event_data(event); + strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); + entry->ksym_hbp = hbp; + entry->ip = instruction_pointer(regs); + strlcpy(entry->p_name, current->comm, TASK_COMM_LEN); +#ifdef CONFIG_PROFILE_KSYM_TRACER + ksym_collect_stats(hbp->info.address); +#endif /* CONFIG_PROFILE_KSYM_TRACER */ + + trace_buffer_unlock_commit(tr, event, 0, pc); +} + +/* Valid access types are represented as + * + * rw- : Set Read/Write Access Breakpoint + * -w- : Set Write Access Breakpoint + * --- : Clear Breakpoints + * --x : Set Execution Break points (Not available yet) + * + */ +static int ksym_trace_get_access_type(char *access_str) +{ + int pos, access = 0; + + for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) { + switch (access_str[pos]) { + case 'r': + access += (pos == 0) ? 4 : -1; + break; + case 'w': + access += (pos == 1) ? 2 : -1; + break; + case '-': + break; + default: + return -EINVAL; + } + } + + switch (access) { + case 6: + access = HW_BREAKPOINT_RW; + break; + case 2: + access = HW_BREAKPOINT_WRITE; + break; + case 0: + access = 0; + } + + return access; +} + +/* + * There can be several possible malformed requests and we attempt to capture + * all of them. We enumerate some of the rules + * 1. We will not allow kernel symbols with ':' since it is used as a delimiter. + * i.e. multiple ':' symbols disallowed. Possible uses are of the form + * ::. + * 2. No delimiter symbol ':' in the input string + * 3. Spurious operator symbols or symbols not in their respective positions + * 4. :--- i.e. clear breakpoint request when ksym_name not in file + * 5. Kernel symbol not a part of /proc/kallsyms + * 6. Duplicate requests + */ +static int parse_ksym_trace_str(char *input_string, char **ksymname, + unsigned long *addr) +{ + char *delimiter = ":"; + int ret; + + ret = -EINVAL; + *ksymname = strsep(&input_string, delimiter); + *addr = kallsyms_lookup_name(*ksymname); + + /* Check for malformed request: (2), (1) and (5) */ + if ((!input_string) || + (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) || + (*addr == 0)) + goto return_code; + ret = ksym_trace_get_access_type(input_string); + +return_code: + return ret; +} + +int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) +{ + struct trace_ksym *entry; + int ret; + + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" + " new requests for tracing can be accepted now.\n", + KSYM_TRACER_MAX); + return -ENOSPC; + } + + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); + if (!entry->ksym_hbp) { + kfree(entry); + return -ENOMEM; + } + + entry->ksym_hbp->info.name = ksymname; + entry->ksym_hbp->info.type = op; + entry->ksym_addr = entry->ksym_hbp->info.address = addr; +#ifdef CONFIG_X86 + entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4; +#endif + entry->ksym_hbp->triggered = (void *)ksym_hbp_handler; + + ret = register_kernel_hw_breakpoint(entry->ksym_hbp); + if (ret < 0) { + printk(KERN_INFO "ksym_tracer request failed. Try again" + " later!!\n"); + kfree(entry->ksym_hbp); + kfree(entry); + return -EAGAIN; + } + hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); + ksym_filter_entry_count++; + + return 0; +} + +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *ppos) +{ + struct trace_ksym *entry; + struct hlist_node *node; + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX]; + ssize_t ret, cnt = 0; + + mutex_lock(&ksym_tracer_mutex); + + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:", + entry->ksym_hbp->info.name); + if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, + "-w-\n"); + else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, + "rw-\n"); + } + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf)); + mutex_unlock(&ksym_tracer_mutex); + + return ret; +} + +static ssize_t ksym_trace_filter_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *ppos) +{ + struct trace_ksym *entry; + struct hlist_node *node; + char *input_string, *ksymname = NULL; + unsigned long ksym_addr = 0; + int ret, op, changed = 0; + + /* Ignore echo "" > ksym_trace_filter */ + if (count == 0) + return 0; + + input_string = kzalloc(count, GFP_KERNEL); + if (!input_string) + return -ENOMEM; + + if (copy_from_user(input_string, buffer, count)) { + kfree(input_string); + return -EFAULT; + } + + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); + if (ret < 0) { + kfree(input_string); + return ret; + } + + mutex_lock(&ksym_tracer_mutex); + + ret = -EINVAL; + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { + if (entry->ksym_addr == ksym_addr) { + /* Check for malformed request: (6) */ + if (entry->ksym_hbp->info.type != op) + changed = 1; + else + goto err_ret; + break; + } + } + if (changed) { + unregister_kernel_hw_breakpoint(entry->ksym_hbp); + entry->ksym_hbp->info.type = op; + if (op > 0) { + ret = register_kernel_hw_breakpoint(entry->ksym_hbp); + if (ret == 0) { + ret = count; + goto unlock_ret_path; + } + } + ksym_filter_entry_count--; + hlist_del_rcu(&(entry->ksym_hlist)); + synchronize_rcu(); + kfree(entry->ksym_hbp); + kfree(entry); + ret = count; + goto err_ret; + } else { + /* Check for malformed request: (4) */ + if (op == 0) + goto err_ret; + ret = process_new_ksym_entry(ksymname, op, ksym_addr); + if (ret) + goto err_ret; + } + ret = count; + goto unlock_ret_path; + +err_ret: + kfree(input_string); + +unlock_ret_path: + mutex_unlock(&ksym_tracer_mutex); + return ret; +} + +static const struct file_operations ksym_tracing_fops = { + .open = tracing_open_generic, + .read = ksym_trace_filter_read, + .write = ksym_trace_filter_write, +}; + +static void ksym_trace_reset(struct trace_array *tr) +{ + struct trace_ksym *entry; + struct hlist_node *node, *node1; + + ksym_tracing_enabled = 0; + + mutex_lock(&ksym_tracer_mutex); + hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, + ksym_hlist) { + unregister_kernel_hw_breakpoint(entry->ksym_hbp); + ksym_filter_entry_count--; + hlist_del_rcu(&(entry->ksym_hlist)); + synchronize_rcu(); + /* Free the 'input_string' only if reset + * after startup self-test + */ +#ifdef CONFIG_FTRACE_SELFTEST + if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY, + strlen(KSYM_SELFTEST_ENTRY)) != 0) +#endif /* CONFIG_FTRACE_SELFTEST*/ + kfree(entry->ksym_hbp->info.name); + kfree(entry->ksym_hbp); + kfree(entry); + } + mutex_unlock(&ksym_tracer_mutex); +} + +static int ksym_trace_init(struct trace_array *tr) +{ + int cpu, ret = 0; + + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + ksym_tracing_enabled = 1; + ksym_trace_array = tr; + + return ret; +} + +static void ksym_trace_print_header(struct seq_file *m) +{ + + seq_puts(m, + "# TASK-PID CPU# Symbol Type " + "Function \n"); + seq_puts(m, + "# | | | | " + "| \n"); +} + +static enum print_line_t ksym_trace_output(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + struct trace_seq *s = &iter->seq; + struct trace_ksym *field; + char str[KSYM_SYMBOL_LEN]; + int ret; + + if (entry->type != TRACE_KSYM) + return TRACE_TYPE_UNHANDLED; + + trace_assign_type(field, entry); + + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name, + entry->pid, iter->cpu, field->ksym_name); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + switch (field->ksym_hbp->info.type) { + case HW_BREAKPOINT_WRITE: + ret = trace_seq_printf(s, " W "); + break; + case HW_BREAKPOINT_RW: + ret = trace_seq_printf(s, " RW "); + break; + default: + return TRACE_TYPE_PARTIAL_LINE; + } + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + sprint_symbol(str, field->ip); + ret = trace_seq_printf(s, "%-20s\n", str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; +} + +struct tracer ksym_tracer __read_mostly = +{ + .name = "ksym_tracer", + .init = ksym_trace_init, + .reset = ksym_trace_reset, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_ksym, +#endif + .print_header = ksym_trace_print_header, + .print_line = ksym_trace_output +}; + +__init static int init_ksym_trace(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + ksym_filter_entry_count = 0; + + entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, + NULL, &ksym_tracing_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'ksym_trace_filter' file\n"); + + return register_tracer(&ksym_tracer); +} +device_initcall(init_ksym_trace); + + +#ifdef CONFIG_PROFILE_KSYM_TRACER +static int ksym_tracer_stat_headers(struct seq_file *m) +{ + seq_printf(m, " Access type "); + seq_printf(m, " Symbol Counter \n"); + return 0; +} + +static int ksym_tracer_stat_show(struct seq_file *m, void *v) +{ + struct hlist_node *stat = v; + struct trace_ksym *entry; + int access_type = 0; + char fn_name[KSYM_NAME_LEN]; + + entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); + + if (entry->ksym_hbp) + access_type = entry->ksym_hbp->info.type; + + switch (access_type) { + case HW_BREAKPOINT_WRITE: + seq_printf(m, " W "); + break; + case HW_BREAKPOINT_RW: + seq_printf(m, " RW "); + break; + default: + seq_printf(m, " NA "); + } + + if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0) + seq_printf(m, " %s ", fn_name); + else + seq_printf(m, " "); + + seq_printf(m, "%15lu\n", entry->counter); + return 0; +} + +static void *ksym_tracer_stat_start(struct tracer_stat *trace) +{ + return &(ksym_filter_head.first); +} + +static void * +ksym_tracer_stat_next(void *v, int idx) +{ + struct hlist_node *stat = v; + + return stat->next; +} + +static struct tracer_stat ksym_tracer_stats = { + .name = "ksym_tracer", + .stat_start = ksym_tracer_stat_start, + .stat_next = ksym_tracer_stat_next, + .stat_headers = ksym_tracer_stat_headers, + .stat_show = ksym_tracer_stat_show +}; + +__init static int ksym_tracer_stat_init(void) +{ + int ret; + + ret = register_stat_tracer(&ksym_tracer_stats); + if (ret) { + printk(KERN_WARNING "Warning: could not register " + "ksym tracer stats\n"); + return 1; + } + + return 0; +} +fs_initcall(ksym_tracer_stat_init); +#endif /* CONFIG_PROFILE_KSYM_TRACER */ diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 00dd648..71f2edb 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_GRAPH_ENT: case TRACE_GRAPH_RET: case TRACE_HW_BRANCHES: + case TRACE_KSYM: return 1; } return 0; @@ -807,3 +808,55 @@ trace_selftest_startup_hw_branches(struct tracer *trace, return ret; } #endif /* CONFIG_HW_BRANCH_TRACER */ + +#ifdef CONFIG_KSYM_TRACER +static int ksym_selftest_dummy; + +int +trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) +{ + unsigned long count; + int ret; + + /* start the tracing */ + ret = tracer_init(trace, tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + + ksym_selftest_dummy = 0; + /* Register the read-write tracing request */ + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW, + (unsigned long)(&ksym_selftest_dummy)); + + if (ret < 0) { + printk(KERN_CONT "ksym_trace read-write startup test failed\n"); + goto ret_path; + } + /* Perform a read and a write operation over the dummy variable to + * trigger the tracer + */ + if (ksym_selftest_dummy == 0) + ksym_selftest_dummy++; + + /* stop the tracing. */ + tracing_stop(); + /* check the trace buffer */ + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + tracing_start(); + + /* read & write operations - one each is performed on the dummy variable + * triggering two entries in the trace buffer + */ + if (!ret && count != 2) { + printk(KERN_CONT "Ksym tracer startup test failed"); + ret = -1; + } + +ret_path: + return ret; +} +#endif /* CONFIG_KSYM_TRACER */ + -- cgit v0.10.2 From 62edab9056a6cf0c9207339c8892c923a5217e45 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:47:06 +0530 Subject: hw-breakpoints: reset bits in dr6 after the corresponding exception is handled This patch resets the bit in dr6 after the corresponding exception is handled in code, so that we keep a clean track of the current virtual debug status register. [ Impact: keep track of breakpoints triggering completion ] Signed-off-by: K.Prasad Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 4867c9f..6945147 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -314,8 +314,12 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct hw_breakpoint *bp; - /* The DR6 value is stored in args->err */ - unsigned long dr7, dr6 = args->err; + unsigned long dr7, dr6; + unsigned long *dr6_p; + + /* The DR6 value is pointed by args->err */ + dr6_p = (unsigned long *)ERR_PTR(args->err); + dr6 = *dr6_p; /* Do an early return if no trap bits are set in DR6 */ if ((dr6 & DR_TRAP_BITS) == 0) @@ -352,6 +356,11 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) rc = NOTIFY_DONE; } /* + * Reset the 'i'th TRAP bit in dr6 to denote completion of + * exception handling + */ + (*dr6_p) &= ~(DR_TRAP0 << i); + /* * bp can be NULL due to lazy debug register switching * or due to the delay between updates of hbp_kernel_pos * and this_hbp_kernel. diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index b1f4dff..f820b73 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) "resuming...\n"); kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c", "", regs); + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; return NOTIFY_STOP; } diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d..b5b1848 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -54,6 +54,7 @@ #include #include #include +#include void jprobe_return_end(void); @@ -967,8 +968,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) + if (post_kprobe_handler(args->regs)) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; ret = NOTIFY_STOP; + } break; case DIE_GPF: /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index de99132..124a4d5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -545,8 +545,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; - if (notify_die(DIE_DEBUG, "debug", regs, dr6, error_code, - SIGTRAP) == NOTIFY_STOP) + if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, + SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd7..11a4ad4 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) struct die_args *arg = args; if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) + if (post_kmmio_handler(arg->err, arg->regs) == 1) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; return NOTIFY_STOP; + } return NOTIFY_DONE; } -- cgit v0.10.2 From 73874005cd8800440be4299bd095387fff4b90ac Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 3 Jun 2009 01:43:38 +0200 Subject: hw-breakpoints: fix undeclared ksym_tracer_mutex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ksym_tracer_mutex is declared inside an #ifdef CONFIG_PROFILE_KSYM_TRACER section. This makes it unavailable for the hardware breakpoint tracer if it is configured without the breakpoint profiler. This patch fixes the following build error: kernel/trace/trace_ksym.c: In function ‘ksym_trace_filter_read’: kernel/trace/trace_ksym.c:226: erreur: ‘ksym_tracer_mutex’ undeclared (first use in this function) kernel/trace/trace_ksym.c:226: erreur: (Each undeclared identifier is reported only once kernel/trace/trace_ksym.c:226: erreur: for each function it appears in.) kernel/trace/trace_ksym.c: In function ‘ksym_trace_filter_write’: kernel/trace/trace_ksym.c:273: erreur: ‘ksym_tracer_mutex’ undeclared (first use in this function) kernel/trace/trace_ksym.c: In function ‘ksym_trace_reset’: kernel/trace/trace_ksym.c:335: erreur: ‘ksym_tracer_mutex’ undeclared (first use in this function) make[1]: *** [kernel/trace/trace_ksym.o] Erreur 1 [ Impact: fix a build error ] Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 11c74f6..eef97e7 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -44,12 +44,12 @@ static unsigned int ksym_tracing_enabled; static HLIST_HEAD(ksym_filter_head); +static DEFINE_MUTEX(ksym_tracer_mutex); + #ifdef CONFIG_PROFILE_KSYM_TRACER #define MAX_UL_INT 0xffffffff -static DEFINE_MUTEX(ksym_tracer_mutex); - void ksym_collect_stats(unsigned long hbp_hit_addr) { struct hlist_node *node; -- cgit v0.10.2 From 4555835b707d5c778ee1c9076670bc99b1eeaf61 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 17 Jun 2009 14:44:19 +0530 Subject: x86: hw_breakpoint.c arch_check_va_in_kernelspace and hw_breakpoint_handler should be static arch_check_va_in_kernelspace() and hw_breakpoint_handler() is used only by same file so it should be static. Also fixed non-ANSI function declaration of function 'arch_uninstall_thread_hw_breakpoint' Fixed following sparse warnings : arch/x86/kernel/hw_breakpoint.c:124:42: warning: non-ANSI function declaration of function 'arch_uninstall_thread_hw_breakpoint' arch/x86/kernel/hw_breakpoint.c:169:5: warning: symbol 'arch_check_va_in_kernelspace' was not declared. Should it be static? arch/x86/kernel/hw_breakpoint.c:313:15: warning: symbol 'hw_breakpoint_handler' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Cc: Alan Stern Cc: "K.Prasad" Cc: Frederic Weisbecker LKML-Reference: <1245230059.2662.4.camel@ht.satnam> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 51d9595..9316a9d 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -121,7 +121,7 @@ void arch_install_thread_hw_breakpoint(struct task_struct *tsk) /* * Install the debug register values for just the kernel, no thread. */ -void arch_uninstall_thread_hw_breakpoint() +void arch_uninstall_thread_hw_breakpoint(void) { /* Clear the user-space portion of debugreg7 by setting only kdr7 */ set_debugreg(kdr7, 7); @@ -166,7 +166,7 @@ int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) /* * Check for virtual address in kernel space. */ -int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) { unsigned int len; @@ -310,7 +310,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) * NOTIFY_STOP returned for all other cases * */ -int __kprobes hw_breakpoint_handler(struct die_args *args) +static int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct hw_breakpoint *bp; -- cgit v0.10.2 From 9d22b536609abf0d64648f99518676ea58245e3b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 19:52:30 +0530 Subject: x86: Mark ptrace_get_debugreg() as static This sparse warning: arch/x86/kernel/ptrace.c:560:15: warning: symbol 'ptrace_get_debugreg' was not declared. Should it be static? triggers because ptrace_get_debugreg() is global but is only used in a single .c file. change ptrace_get_debugreg() to static to fix that - this also addresses the sparse warning. Signed-off-by: Jaswinder Singh Rajput Cc: Steven Rostedt LKML-Reference: <1246458150.6940.19.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b457f78..cabdabc 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -557,7 +557,7 @@ restore: /* * Handle PTRACE_PEEKUSR calls for the debug register area. */ -unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) { struct thread_struct *thread = &(tsk->thread); unsigned long val = 0; -- cgit v0.10.2 From db59504d89db1462a5281fb55b1d962cb74a398f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:52:36 +0800 Subject: ksym_tracer: Extract trace entry from struct trace_ksym struct trace_ksym is used as an entry in hbp list, and is also used as trace_entry stored in ring buffer. This is not necessary and is a waste of memory in ring buffer. There is also a bug that dereferencing field->ksym_hbp in ksym_trace_output() can be invalid. Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E2A4.4050007@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 7d5cc37..ff1ef41 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -215,17 +215,12 @@ struct syscall_trace_exit { #define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy" extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); -struct trace_ksym { +struct ksym_trace_entry { struct trace_entry ent; - struct hw_breakpoint *ksym_hbp; - unsigned long ksym_addr; unsigned long ip; -#ifdef CONFIG_PROFILE_KSYM_TRACER - unsigned long counter; -#endif - struct hlist_node ksym_hlist; + unsigned char type; char ksym_name[KSYM_NAME_LEN]; - char p_name[TASK_COMM_LEN]; + char cmd[TASK_COMM_LEN]; }; /* @@ -343,7 +338,7 @@ extern void __ftrace_bad_type(void); TRACE_SYSCALL_ENTER); \ IF_ASSIGN(var, ent, struct syscall_trace_exit, \ TRACE_SYSCALL_EXIT); \ - IF_ASSIGN(var, ent, struct trace_ksym, TRACE_KSYM); \ + IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\ __ftrace_bad_type(); \ } while (0) diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index eef97e7..085ff05 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -37,6 +37,15 @@ #define KSYM_TRACER_OP_LEN 3 /* rw- */ #define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1) +struct trace_ksym { + struct hw_breakpoint *ksym_hbp; + unsigned long ksym_addr; +#ifdef CONFIG_PROFILE_KSYM_TRACER + unsigned long counter; +#endif + struct hlist_node ksym_hlist; +}; + static struct trace_array *ksym_trace_array; static unsigned int ksym_filter_entry_count; @@ -71,7 +80,7 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) { struct ring_buffer_event *event; struct trace_array *tr; - struct trace_ksym *entry; + struct ksym_trace_entry *entry; int pc; if (!ksym_tracing_enabled) @@ -85,11 +94,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) if (!event) return; - entry = ring_buffer_event_data(event); + entry = ring_buffer_event_data(event); + entry->ip = instruction_pointer(regs); + entry->type = hbp->info.type; strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); - entry->ksym_hbp = hbp; - entry->ip = instruction_pointer(regs); - strlcpy(entry->p_name, current->comm, TASK_COMM_LEN); + strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); + #ifdef CONFIG_PROFILE_KSYM_TRACER ksym_collect_stats(hbp->info.address); #endif /* CONFIG_PROFILE_KSYM_TRACER */ @@ -380,7 +390,7 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) { struct trace_entry *entry = iter->ent; struct trace_seq *s = &iter->seq; - struct trace_ksym *field; + struct ksym_trace_entry *field; char str[KSYM_SYMBOL_LEN]; int ret; @@ -389,12 +399,12 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->p_name, + ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->cmd, entry->pid, iter->cpu, field->ksym_name); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - switch (field->ksym_hbp->info.type) { + switch (field->type) { case HW_BREAKPOINT_WRITE: ret = trace_seq_printf(s, " W "); break; -- cgit v0.10.2 From be9742e6cb107fe1d77db7a081ea4eb25e79e1ad Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:52:52 +0800 Subject: ksym_tracer: Rewrite ksym_trace_filter_read() Reading ksym_trace_filter gave me some arbitrary characters, when it should show nothing. It's because buf is not initialized when there's no filter. Also reduce stack usage by about 512 bytes. Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E2B4.6030706@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 085ff05..b6710d3 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -35,7 +35,6 @@ #define KSYM_TRACER_MAX HBP_NUM #define KSYM_TRACER_OP_LEN 3 /* rw- */ -#define KSYM_FILTER_ENTRY_LEN (KSYM_NAME_LEN + KSYM_TRACER_OP_LEN + 1) struct trace_ksym { struct hw_breakpoint *ksym_hbp; @@ -230,25 +229,33 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, { struct trace_ksym *entry; struct hlist_node *node; - char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX]; - ssize_t ret, cnt = 0; + struct trace_seq *s; + ssize_t cnt = 0; + int ret; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + trace_seq_init(s); mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:", - entry->ksym_hbp->info.name); + ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name); if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) - cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, - "-w-\n"); + ret = trace_seq_puts(s, "-w-\n"); else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) - cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, - "rw-\n"); + ret = trace_seq_puts(s, "rw-\n"); + WARN_ON_ONCE(!ret); } - ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf)); + + cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); + mutex_unlock(&ksym_tracer_mutex); - return ret; + kfree(s); + + return cnt; } static ssize_t ksym_trace_filter_write(struct file *file, -- cgit v0.10.2 From f088e5471297cc78d7465e1fd997cb1a91a48019 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:53:18 +0800 Subject: ksym_tracer: Fix validation of access type # echo 'pid_max:rw-' > ksym_trace_filter # cat ksym_trace_filter pid_max:rw- # echo 'pid_max:ww-' > ksym_trace_filter (should return -EINVAL) # cat ksym_trace_filter (but it ended up removing filter entry) Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E2CE.6080409@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index b6710d3..9556009 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -114,24 +114,22 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) * --x : Set Execution Break points (Not available yet) * */ -static int ksym_trace_get_access_type(char *access_str) +static int ksym_trace_get_access_type(char *str) { - int pos, access = 0; + int access = 0; - for (pos = 0; pos < KSYM_TRACER_OP_LEN; pos++) { - switch (access_str[pos]) { - case 'r': - access += (pos == 0) ? 4 : -1; - break; - case 'w': - access += (pos == 1) ? 2 : -1; - break; - case '-': - break; - default: - return -EINVAL; - } - } + if (str[0] == 'r') + access += 4; + else if (str[0] != '-') + return -EINVAL; + + if (str[1] == 'w') + access += 2; + else if (str[1] != '-') + return -EINVAL; + + if (str[2] != '-') + return -EINVAL; switch (access) { case 6: @@ -140,8 +138,6 @@ static int ksym_trace_get_access_type(char *access_str) case 2: access = HW_BREAKPOINT_WRITE; break; - case 0: - access = 0; } return access; -- cgit v0.10.2 From 92cf9f8f7e89c6bdbb1a724f879b8b18fc0dfe0f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:53:47 +0800 Subject: ksym_tracer: Fix validation of length of access type Don't take newline into account, otherwise: # echo 'pid_max:-w-' > ksym_trace_filter # echo -n 'pid_max:rw-' > ksym_trace_filter bash: echo: write error: Invalid argument Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E2EB.9070503@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 9556009..72fcb46 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -158,21 +158,21 @@ static int ksym_trace_get_access_type(char *str) static int parse_ksym_trace_str(char *input_string, char **ksymname, unsigned long *addr) { - char *delimiter = ":"; int ret; - ret = -EINVAL; - *ksymname = strsep(&input_string, delimiter); + strstrip(input_string); + + *ksymname = strsep(&input_string, ":"); *addr = kallsyms_lookup_name(*ksymname); /* Check for malformed request: (2), (1) and (5) */ if ((!input_string) || - (strlen(input_string) != (KSYM_TRACER_OP_LEN + 1)) || - (*addr == 0)) - goto return_code; + (strlen(input_string) != KSYM_TRACER_OP_LEN) || + (*addr == 0)) + return -EINVAL;; + ret = ksym_trace_get_access_type(input_string); -return_code: return ret; } -- cgit v0.10.2 From 011ed56853e07e30653d6f1bfddc56b396218664 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:54:08 +0800 Subject: ksym_tracer: NIL-terminate user input filter Make sure the user input string is NULL-terminated. Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E300.7020601@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 72fcb46..8cbed5a 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -264,11 +264,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, unsigned long ksym_addr = 0; int ret, op, changed = 0; - /* Ignore echo "" > ksym_trace_filter */ - if (count == 0) - return 0; - - input_string = kzalloc(count, GFP_KERNEL); + input_string = kzalloc(count + 1, GFP_KERNEL); if (!input_string) return -ENOMEM; @@ -276,6 +272,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, kfree(input_string); return -EFAULT; } + input_string[count] = '\0'; ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); if (ret < 0) { -- cgit v0.10.2 From 0d109c8f70eab8b9f693bd5caea23012394e4876 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:54:28 +0800 Subject: ksym_tracer: Report error when failed to re-register hbp When access type is changed, the hw break point will be unregistered and then be registered again with new access type. But the registration may fail, in this case, -errno should be returned. Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E314.7070004@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 8cbed5a..891e3b8 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -302,13 +302,13 @@ static ssize_t ksym_trace_filter_write(struct file *file, ret = count; goto unlock_ret_path; } - } + } else + ret = count; ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); kfree(entry->ksym_hbp); kfree(entry); - ret = count; goto err_ret; } else { /* Check for malformed request: (4) */ -- cgit v0.10.2 From 558df6c8f74ac4a0b9026ef85b0028280f364d96 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:54:48 +0800 Subject: ksym_tracer: Fix memory leak - When remove a filter, we leak entry->ksym_hbp->info.name. - With CONFIG_FTRAC_SELFTEST enabled, we leak ->info.name: # echo ksym_tracer > current_tracer # echo 'ksym_selftest_dummy:rw-' > ksym_trace_filter # echo nop > current_tracer Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E328.8010200@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 891e3b8..7d349d3 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -179,7 +179,7 @@ static int parse_ksym_trace_str(char *input_string, char **ksymname, int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) { struct trace_ksym *entry; - int ret; + int ret = -ENOMEM; if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" @@ -193,12 +193,13 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) return -ENOMEM; entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (!entry->ksym_hbp) { - kfree(entry); - return -ENOMEM; - } + if (!entry->ksym_hbp) + goto err; + + entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL); + if (!entry->ksym_hbp->info.name) + goto err; - entry->ksym_hbp->info.name = ksymname; entry->ksym_hbp->info.type = op; entry->ksym_addr = entry->ksym_hbp->info.address = addr; #ifdef CONFIG_X86 @@ -210,14 +211,18 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) if (ret < 0) { printk(KERN_INFO "ksym_tracer request failed. Try again" " later!!\n"); - kfree(entry->ksym_hbp); - kfree(entry); - return -EAGAIN; + ret = -EAGAIN; + goto err; } hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); ksym_filter_entry_count++; - return 0; +err: + if (entry->ksym_hbp) + kfree(entry->ksym_hbp->info.name); + kfree(entry->ksym_hbp); + kfree(entry); + return ret; } static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, @@ -289,7 +294,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, if (entry->ksym_hbp->info.type != op) changed = 1; else - goto err_ret; + goto out; break; } } @@ -298,34 +303,29 @@ static ssize_t ksym_trace_filter_write(struct file *file, entry->ksym_hbp->info.type = op; if (op > 0) { ret = register_kernel_hw_breakpoint(entry->ksym_hbp); - if (ret == 0) { - ret = count; - goto unlock_ret_path; - } - } else - ret = count; + if (ret == 0) + goto out; + } ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); + kfree(entry->ksym_hbp->info.name); kfree(entry->ksym_hbp); kfree(entry); - goto err_ret; + goto out; } else { /* Check for malformed request: (4) */ if (op == 0) - goto err_ret; + goto out; ret = process_new_ksym_entry(ksymname, op, ksym_addr); - if (ret) - goto err_ret; } - ret = count; - goto unlock_ret_path; +out: + mutex_unlock(&ksym_tracer_mutex); -err_ret: kfree(input_string); -unlock_ret_path: - mutex_unlock(&ksym_tracer_mutex); + if (!ret) + ret = count; return ret; } @@ -349,14 +349,7 @@ static void ksym_trace_reset(struct trace_array *tr) ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); - /* Free the 'input_string' only if reset - * after startup self-test - */ -#ifdef CONFIG_FTRACE_SELFTEST - if (strncmp(entry->ksym_hbp->info.name, KSYM_SELFTEST_ENTRY, - strlen(KSYM_SELFTEST_ENTRY)) != 0) -#endif /* CONFIG_FTRACE_SELFTEST*/ - kfree(entry->ksym_hbp->info.name); + kfree(entry->ksym_hbp->info.name); kfree(entry->ksym_hbp); kfree(entry); } -- cgit v0.10.2 From 9d7e934408b52cd53dd85270eb36941a6a318cc5 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 7 Jul 2009 13:55:18 +0800 Subject: ksym_tracer: Fix the output of stat tracing - make ksym_tracer_stat_start() return head->first instead of &head->first - make the output properly aligned Before: Access type Symbol Counter NA 0 RW pid_max 0 After: Access Type Symbol Counter ----------- ------ ------- RW pid_max 0 Signed-off-by: Li Zefan Acked-by: Frederic Weisbecker Cc: "K.Prasad" Cc: Alan Stern Cc: Steven Rostedt LKML-Reference: <4A52E346.5050608@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 7d349d3..1256a6e 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -453,8 +453,10 @@ device_initcall(init_ksym_trace); #ifdef CONFIG_PROFILE_KSYM_TRACER static int ksym_tracer_stat_headers(struct seq_file *m) { - seq_printf(m, " Access type "); - seq_printf(m, " Symbol Counter \n"); + seq_puts(m, " Access Type "); + seq_puts(m, " Symbol Counter\n"); + seq_puts(m, " ----------- "); + seq_puts(m, " ------ -------\n"); return 0; } @@ -472,27 +474,27 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) switch (access_type) { case HW_BREAKPOINT_WRITE: - seq_printf(m, " W "); + seq_puts(m, " W "); break; case HW_BREAKPOINT_RW: - seq_printf(m, " RW "); + seq_puts(m, " RW "); break; default: - seq_printf(m, " NA "); + seq_puts(m, " NA "); } if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0) - seq_printf(m, " %s ", fn_name); + seq_printf(m, " %-36s", fn_name); else - seq_printf(m, " "); + seq_printf(m, " %-36s", ""); + seq_printf(m, " %15lu\n", entry->counter); - seq_printf(m, "%15lu\n", entry->counter); return 0; } static void *ksym_tracer_stat_start(struct tracer_stat *trace) { - return &(ksym_filter_head.first); + return ksym_filter_head.first; } static void * -- cgit v0.10.2 From d857ace143df3884954887e1899a65831ca72ece Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 22 Jul 2009 11:21:31 +0800 Subject: tracing/ksym_tracer: fix the output of ksym tracer Fix the output format of ksym tracer, make it properly aligned Befor patch: # tracer: ksym_tracer # # TASK-PID CPU# Symbol Type Function # | | | | | bash 1378 1 ksym_tracer_mutex W mutex_lock+0x11/0x27 bash 1378 1 ksym_filter_head W process_new_ksym_entry+0xd2/0x10c bash 1378 1 ksym_tracer_mutex W mutex_unlock+0x12/0x1b cat 1429 0 ksym_tracer_mutex W mutex_lock+0x11/0x27 After patch: # tracer: ksym_tracer # # TASK-PID CPU# Symbol Type Function # | | | | | cat-1423 [000] ksym_tracer_mutex RW mutex_lock+0x11/0x27 cat-1423 [000] ksym_filter_head RW ksym_trace_filter_read+0x6e/0x10d cat-1423 [000] ksym_tracer_mutex RW mutex_unlock+0x12/0x1b cat-1423 [000] ksym_tracer_mutex RW mutex_lock+0x11/0x27 cat-1423 [000] ksym_filter_head RW ksym_trace_filter_read+0x6e/0x10d cat-1423 [000] ksym_tracer_mutex RW mutex_unlock+0x12/0x1b Signed-off-by: Xiao Guangrong LKML-Reference: <4A6685BB.2090809@cn.fujitsu.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 1256a6e..fbf3a8e 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -370,13 +370,12 @@ static int ksym_trace_init(struct trace_array *tr) static void ksym_trace_print_header(struct seq_file *m) { - seq_puts(m, - "# TASK-PID CPU# Symbol Type " - "Function \n"); + "# TASK-PID CPU# Symbol " + "Type Function\n"); seq_puts(m, - "# | | | | " - "| \n"); + "# | | | " + " | |\n"); } static enum print_line_t ksym_trace_output(struct trace_iterator *iter) @@ -392,7 +391,7 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%-15s %-5d %-3d %-20s ", field->cmd, + ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd, entry->pid, iter->cpu, field->ksym_name); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -412,7 +411,7 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) return TRACE_TYPE_PARTIAL_LINE; sprint_symbol(str, field->ip); - ret = trace_seq_printf(s, "%-20s\n", str); + ret = trace_seq_printf(s, "%s\n", str); if (!ret) return TRACE_TYPE_PARTIAL_LINE; -- cgit v0.10.2 From 8e068542a8d9efec55126284d2f5cb32f003d507 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 22 Jul 2009 11:23:41 +0800 Subject: tracing/ksym_tracer: fix write operation of ksym_trace_filter This patch fix 2 bugs: - fix the return value of ksym_trace_filter_write() when we want to clear symbol in ksym_trace_filter file for example: # echo global_trace:rw- > /debug/tracing/ksym_trace_filter # echo global_trace:--- > /debug/tracing/ksym_trace_filter -bash: echo: write error: Invalid argument # cat /debug/tracing/ksym_trace_filter # We want to clear 'global_trace' in ksym_trace_filter, it complain with "Invalid argument", but the operation is successful - the "r--" access types is not allowed, but ksym_trace_filter file think it OK for example: # echo ksym_tracer_mutex:r-- > ksym_trace_filter -bash: echo: write error: Resource temporarily unavailable # dmesg ksym_tracer request failed. Try again later!! The error occur at register_kernel_hw_breakpoint(), but It's should at access types parser Signed-off-by: Xiao Guangrong LKML-Reference: <4A66863D.5090802@cn.fujitsu.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index fbf3a8e..cd5cb65 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -135,6 +135,9 @@ static int ksym_trace_get_access_type(char *str) case 6: access = HW_BREAKPOINT_RW; break; + case 4: + access = -EINVAL; + break; case 2: access = HW_BREAKPOINT_WRITE; break; @@ -312,6 +315,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, kfree(entry->ksym_hbp->info.name); kfree(entry->ksym_hbp); kfree(entry); + ret = 0; goto out; } else { /* Check for malformed request: (4) */ -- cgit v0.10.2 From 75e33751ca8bbb72dd6f1a74d2810ddc8cbe4bdf Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 23 Jul 2009 12:01:22 +0800 Subject: tracing/ksym_tracer: support quick clear for ksym_trace_filter -- v2 It's rather boring to clear symbol one by one in ksym_trace_filter file, so, this patch will let ksym_trace_filter file support quickly clear all break points. We can write "0" to this file and it will clear all symbols for example: # cat ksym_trace_filter ksym_filter_head:rw- global_trace:rw- # echo 0 > ksym_trace_filter # cat ksym_trace_filter # Changelog v1->v2: Add other ways to clear all breakpoints by writing NULL or "*:---" to ksym_trace_filter file base on K.Prasad's suggestion Signed-off-by: Xiao Guangrong LKML-Reference: <4A67E092.3080202@cn.fujitsu.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index cd5cb65..2fde875 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -163,8 +163,6 @@ static int parse_ksym_trace_str(char *input_string, char **ksymname, { int ret; - strstrip(input_string); - *ksymname = strsep(&input_string, ":"); *addr = kallsyms_lookup_name(*ksymname); @@ -262,6 +260,25 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, return cnt; } +static void __ksym_trace_reset(void) +{ + struct trace_ksym *entry; + struct hlist_node *node, *node1; + + mutex_lock(&ksym_tracer_mutex); + hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, + ksym_hlist) { + unregister_kernel_hw_breakpoint(entry->ksym_hbp); + ksym_filter_entry_count--; + hlist_del_rcu(&(entry->ksym_hlist)); + synchronize_rcu(); + kfree(entry->ksym_hbp->info.name); + kfree(entry->ksym_hbp); + kfree(entry); + } + mutex_unlock(&ksym_tracer_mutex); +} + static ssize_t ksym_trace_filter_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) @@ -282,6 +299,21 @@ static ssize_t ksym_trace_filter_write(struct file *file, } input_string[count] = '\0'; + strstrip(input_string); + + /* + * Clear all breakpoints if: + * 1: echo > ksym_trace_filter + * 2: echo 0 > ksym_trace_filter + * 3: echo "*:---" > ksym_trace_filter + */ + if (!input_string[0] || !strcmp(input_string, "0") || + !strcmp(input_string, "*:---")) { + __ksym_trace_reset(); + kfree(input_string); + return count; + } + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); if (ret < 0) { kfree(input_string); @@ -341,23 +373,8 @@ static const struct file_operations ksym_tracing_fops = { static void ksym_trace_reset(struct trace_array *tr) { - struct trace_ksym *entry; - struct hlist_node *node, *node1; - ksym_tracing_enabled = 0; - - mutex_lock(&ksym_tracer_mutex); - hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, - ksym_hlist) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); - ksym_filter_entry_count--; - hlist_del_rcu(&(entry->ksym_hlist)); - synchronize_rcu(); - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); - kfree(entry); - } - mutex_unlock(&ksym_tracer_mutex); + __ksym_trace_reset(); } static int ksym_trace_init(struct trace_array *tr) -- cgit v0.10.2 From eb13296cfaf6c699566473669a96a38a90562384 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:13 -0400 Subject: x86: Instruction decoder API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add x86 instruction decoder to arch-specific libraries. This decoder can decode x86 instructions used in kernel into prefix, opcode, modrm, sib, displacement and immediates. This can also show the length of instructions. This version introduces instruction attributes for decoding instructions. The instruction attribute tables are generated from the opcode map file (x86-opcode-map.txt) by the generator script(gen-insn-attr-x86.awk). Currently, the opcode maps are based on opcode maps in Intel(R) 64 and IA-32 Architectures Software Developers Manual Vol.2: Appendix.A, and consist of below two types of opcode tables. 1-byte/2-bytes/3-bytes opcodes, which has 256 elements, are written as below; Table: table-name Referrer: escaped-name opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] (or) opcode: escape # escaped-name EndTable Group opcodes, which has 8 elements, are written as below; GrpTable: GrpXXX reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] EndTable These opcode maps include a few SSE and FP opcodes (for setup), because those opcodes are used in the kernel. Signed-off-by: Masami Hiramatsu Signed-off-by: Jim Keniston Acked-by: H. Peter Anvin Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203413.31965.49709.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h new file mode 100644 index 0000000..2866fdd --- /dev/null +++ b/arch/x86/include/asm/inat.h @@ -0,0 +1,188 @@ +#ifndef _ASM_X86_INAT_H +#define _ASM_X86_INAT_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include + +/* + * Internal bits. Don't use bitmasks directly, because these bits are + * unstable. You should use checking functions. + */ + +#define INAT_OPCODE_TABLE_SIZE 256 +#define INAT_GROUP_TABLE_SIZE 8 + +/* Legacy instruction prefixes */ +#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ +#define INAT_PFX_REPNE 2 /* 0xF2 */ /* LPFX2 */ +#define INAT_PFX_REPE 3 /* 0xF3 */ /* LPFX3 */ +#define INAT_PFX_LOCK 4 /* 0xF0 */ +#define INAT_PFX_CS 5 /* 0x2E */ +#define INAT_PFX_DS 6 /* 0x3E */ +#define INAT_PFX_ES 7 /* 0x26 */ +#define INAT_PFX_FS 8 /* 0x64 */ +#define INAT_PFX_GS 9 /* 0x65 */ +#define INAT_PFX_SS 10 /* 0x36 */ +#define INAT_PFX_ADDRSZ 11 /* 0x67 */ + +#define INAT_LPREFIX_MAX 3 + +/* Immediate size */ +#define INAT_IMM_BYTE 1 +#define INAT_IMM_WORD 2 +#define INAT_IMM_DWORD 3 +#define INAT_IMM_QWORD 4 +#define INAT_IMM_PTR 5 +#define INAT_IMM_VWORD32 6 +#define INAT_IMM_VWORD 7 + +/* Legacy prefix */ +#define INAT_PFX_OFFS 0 +#define INAT_PFX_BITS 4 +#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) +#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) +/* Escape opcodes */ +#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) +#define INAT_ESC_BITS 2 +#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) +#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) +/* Group opcodes (1-16) */ +#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) +#define INAT_GRP_BITS 5 +#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) +#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) +/* Immediates */ +#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) +#define INAT_IMM_BITS 3 +#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) +/* Flags */ +#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) +#define INAT_REXPFX (1 << INAT_FLAG_OFFS) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 5)) +/* Attribute making macros for attribute tables */ +#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) +#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) +#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) +#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) + +/* Attribute search APIs */ +extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); +extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, + insn_byte_t last_pfx, + insn_attr_t esc_attr); +extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, + insn_byte_t last_pfx, + insn_attr_t esc_attr); + +/* Attribute checking functions */ +static inline int inat_is_prefix(insn_attr_t attr) +{ + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_address_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; +} + +static inline int inat_is_operand_size_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; +} + +static inline int inat_last_prefix_id(insn_attr_t attr) +{ + if ((attr & INAT_PFX_MASK) > INAT_LPREFIX_MAX) + return 0; + else + return attr & INAT_PFX_MASK; +} + +static inline int inat_is_escape(insn_attr_t attr) +{ + return attr & INAT_ESC_MASK; +} + +static inline int inat_escape_id(insn_attr_t attr) +{ + return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; +} + +static inline int inat_is_group(insn_attr_t attr) +{ + return attr & INAT_GRP_MASK; +} + +static inline int inat_group_id(insn_attr_t attr) +{ + return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; +} + +static inline int inat_group_common_attribute(insn_attr_t attr) +{ + return attr & ~INAT_GRP_MASK; +} + +static inline int inat_has_immediate(insn_attr_t attr) +{ + return attr & INAT_IMM_MASK; +} + +static inline int inat_immediate_size(insn_attr_t attr) +{ + return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; +} + +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return attr & INAT_REXPFX; +} + +static inline int inat_has_modrm(insn_attr_t attr) +{ + return attr & INAT_MODRM; +} + +static inline int inat_is_force64(insn_attr_t attr) +{ + return attr & INAT_FORCE64; +} + +static inline int inat_has_second_immediate(insn_attr_t attr) +{ + return attr & INAT_SCNDIMM; +} + +static inline int inat_has_moffset(insn_attr_t attr) +{ + return attr & INAT_MOFFSET; +} + +static inline int inat_has_variant(insn_attr_t attr) +{ + return attr & INAT_VARIANT; +} + +#endif diff --git a/arch/x86/include/asm/inat_types.h b/arch/x86/include/asm/inat_types.h new file mode 100644 index 0000000..cb3c20c --- /dev/null +++ b/arch/x86/include/asm/inat_types.h @@ -0,0 +1,29 @@ +#ifndef _ASM_X86_INAT_TYPES_H +#define _ASM_X86_INAT_TYPES_H +/* + * x86 instruction attributes + * + * Written by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* Instruction attributes */ +typedef unsigned int insn_attr_t; +typedef unsigned char insn_byte_t; +typedef signed int insn_value_t; + +#endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h new file mode 100644 index 0000000..12b4e37 --- /dev/null +++ b/arch/x86/include/asm/insn.h @@ -0,0 +1,143 @@ +#ifndef _ASM_X86_INSN_H +#define _ASM_X86_INSN_H +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +/* insn_attr_t is defined in inat.h */ +#include + +struct insn_field { + union { + insn_value_t value; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +struct insn { + struct insn_field prefixes; /* + * Prefixes + * prefixes.bytes[3]: last prefix + */ + struct insn_field rex_prefix; /* REX prefix */ + struct insn_field opcode; /* + * opcode.bytes[0]: opcode1 + * opcode.bytes[1]: opcode2 + * opcode.bytes[2]: opcode3 + */ + struct insn_field modrm; + struct insn_field sib; + struct insn_field displacement; + union { + struct insn_field immediate; + struct insn_field moffset1; /* for 64bit MOV */ + struct insn_field immediate1; /* for 64bit imm or off16/32 */ + }; + union { + struct insn_field moffset2; /* for 64bit MOV */ + struct insn_field immediate2; /* for 64bit imm or seg16 */ + }; + + insn_attr_t attr; + unsigned char opnd_bytes; + unsigned char addr_bytes; + unsigned char length; + unsigned char x86_64; + + const insn_byte_t *kaddr; /* kernel address of insn to analyze */ + const insn_byte_t *next_byte; +}; + +#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) +#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) +#define X86_MODRM_RM(modrm) ((modrm) & 0x07) + +#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) +#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) +#define X86_SIB_BASE(sib) ((sib) & 0x07) + +#define X86_REX_W(rex) ((rex) & 8) +#define X86_REX_R(rex) ((rex) & 4) +#define X86_REX_X(rex) ((rex) & 2) +#define X86_REX_B(rex) ((rex) & 1) + +/* The last prefix is needed for two-byte and three-byte opcodes */ +static inline insn_byte_t insn_last_prefix(struct insn *insn) +{ + return insn->prefixes.bytes[3]; +} + +extern void insn_init(struct insn *insn, const void *kaddr, int x86_64); +extern void insn_get_prefixes(struct insn *insn); +extern void insn_get_opcode(struct insn *insn); +extern void insn_get_modrm(struct insn *insn); +extern void insn_get_sib(struct insn *insn); +extern void insn_get_displacement(struct insn *insn); +extern void insn_get_immediate(struct insn *insn); +extern void insn_get_length(struct insn *insn); + +/* Attribute will be determined after getting ModRM (for opcode groups) */ +static inline void insn_get_attribute(struct insn *insn) +{ + insn_get_modrm(insn); +} + +/* Instruction uses RIP-relative addressing */ +extern int insn_rip_relative(struct insn *insn); + +/* Init insn for kernel text */ +static inline void kernel_insn_init(struct insn *insn, const void *kaddr) +{ +#ifdef CONFIG_X86_64 + insn_init(insn, kaddr, 1); +#else /* CONFIG_X86_32 */ + insn_init(insn, kaddr, 0); +#endif +} + +/* Offset of each field from kaddr */ +static inline int insn_offset_rex_prefix(struct insn *insn) +{ + return insn->prefixes.nbytes; +} +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; +} +static inline int insn_offset_modrm(struct insn *insn) +{ + return insn_offset_opcode(insn) + insn->opcode.nbytes; +} +static inline int insn_offset_sib(struct insn *insn) +{ + return insn_offset_modrm(insn) + insn->modrm.nbytes; +} +static inline int insn_offset_displacement(struct insn *insn) +{ + return insn_offset_sib(insn) + insn->sib.nbytes; +} +static inline int insn_offset_immediate(struct insn *insn) +{ + return insn_offset_displacement(insn) + insn->displacement.nbytes; +} + +#endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 07c3189..c77f8a7 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -2,12 +2,25 @@ # Makefile for x86 specific library files. # +inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk +inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt +quiet_cmd_inat_tables = GEN $@ + cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + +$(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) + $(call cmd,inat_tables) + +$(obj)/inat.o: $(obj)/inat-tables.c + +clean-files := inat-tables.c + obj-$(CONFIG_SMP) := msr.o lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o +lib-y += insn.o inat.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c new file mode 100644 index 0000000..054656a --- /dev/null +++ b/arch/x86/lib/inat.c @@ -0,0 +1,78 @@ +/* + * x86 instruction attribute tables + * + * Written by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#include + +/* Attribute tables are generated from opcode map */ +#include "inat-tables.c" + +/* Attribute search APIs */ +insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) +{ + return inat_primary_table[opcode]; +} + +insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx, + insn_attr_t esc_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_escape_id(esc_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_escape_tables[n][0]; + if (!table) + return 0; + if (inat_has_variant(table[opcode]) && m) { + table = inat_escape_tables[n][m]; + if (!table) + return 0; + } + return table[opcode]; +} + +insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, + insn_attr_t grp_attr) +{ + const insn_attr_t *table; + insn_attr_t lpfx_attr; + int n, m = 0; + + n = inat_group_id(grp_attr); + if (last_pfx) { + lpfx_attr = inat_get_opcode_attribute(last_pfx); + m = inat_last_prefix_id(lpfx_attr); + } + table = inat_group_tables[n][0]; + if (!table) + return inat_group_common_attribute(grp_attr); + if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { + table = inat_escape_tables[n][m]; + if (!table) + return inat_group_common_attribute(grp_attr); + } + return table[X86_MODRM_REG(modrm)] | + inat_group_common_attribute(grp_attr); +} + diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c new file mode 100644 index 0000000..dfd56a3 --- /dev/null +++ b/arch/x86/lib/insn.c @@ -0,0 +1,464 @@ +/* + * x86 instruction analysis + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004, 2009 + */ + +#include +#include +#include + +#define get_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + +#define peek_next(t, insn) \ + ({t r; r = *(t*)insn->next_byte; r; }) + +/** + * insn_init() - initialize struct insn + * @insn: &struct insn to be initialized + * @kaddr: address (in kernel memory) of instruction (or copy thereof) + * @x86_64: !0 for 64-bit kernel or 64-bit app + */ +void insn_init(struct insn *insn, const void *kaddr, int x86_64) +{ + memset(insn, 0, sizeof(*insn)); + insn->kaddr = kaddr; + insn->next_byte = kaddr; + insn->x86_64 = x86_64 ? 1 : 0; + insn->opnd_bytes = 4; + if (x86_64) + insn->addr_bytes = 8; + else + insn->addr_bytes = 4; +} + +/** + * insn_get_prefixes - scan x86 instruction prefix bytes + * @insn: &struct insn containing instruction + * + * Populates the @insn->prefixes bitmap, and updates @insn->next_byte + * to point to the (first) opcode. No effect if @insn->prefixes.got + * is already set. + */ +void insn_get_prefixes(struct insn *insn) +{ + struct insn_field *prefixes = &insn->prefixes; + insn_attr_t attr; + insn_byte_t b, lb; + int i, nb; + + if (prefixes->got) + return; + + nb = 0; + lb = 0; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + while (inat_is_prefix(attr)) { + /* Skip if same prefix */ + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == b) + goto found; + if (nb == 4) + /* Invalid instruction */ + break; + prefixes->bytes[nb++] = b; + if (inat_is_address_size_prefix(attr)) { + /* address size switches 2/4 or 4/8 */ + if (insn->x86_64) + insn->addr_bytes ^= 12; + else + insn->addr_bytes ^= 6; + } else if (inat_is_operand_size_prefix(attr)) { + /* oprand size switches 2/4 */ + insn->opnd_bytes ^= 6; + } +found: + prefixes->nbytes++; + insn->next_byte++; + lb = b; + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + } + /* Set the last prefix */ + if (lb && lb != insn->prefixes.bytes[3]) { + if (unlikely(insn->prefixes.bytes[3])) { + /* Swap the last prefix */ + b = insn->prefixes.bytes[3]; + for (i = 0; i < nb; i++) + if (prefixes->bytes[i] == lb) + prefixes->bytes[i] = b; + } + insn->prefixes.bytes[3] = lb; + } + + if (insn->x86_64) { + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_rex_prefix(attr)) { + insn->rex_prefix.value = b; + insn->rex_prefix.nbytes = 1; + insn->next_byte++; + if (X86_REX_W(b)) + /* REX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } + } + insn->rex_prefix.got = 1; + prefixes->got = 1; + return; +} + +/** + * insn_get_opcode - collect opcode(s) + * @insn: &struct insn containing instruction + * + * Populates @insn->opcode, updates @insn->next_byte to point past the + * opcode byte(s), and set @insn->attr (except for groups). + * If necessary, first collects any preceding (prefix) bytes. + * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got + * is already 1. + */ +void insn_get_opcode(struct insn *insn) +{ + struct insn_field *opcode = &insn->opcode; + insn_byte_t op, pfx; + if (opcode->got) + return; + if (!insn->prefixes.got) + insn_get_prefixes(insn); + + /* Get first opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[0] = op; + opcode->nbytes = 1; + insn->attr = inat_get_opcode_attribute(op); + while (inat_is_escape(insn->attr)) { + /* Get escaped opcode */ + op = get_next(insn_byte_t, insn); + opcode->bytes[opcode->nbytes++] = op; + pfx = insn_last_prefix(insn); + insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); + } + opcode->got = 1; +} + +/** + * insn_get_modrm - collect ModRM byte, if any + * @insn: &struct insn containing instruction + * + * Populates @insn->modrm and updates @insn->next_byte to point past the + * ModRM byte, if any. If necessary, first collects the preceding bytes + * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. + */ +void insn_get_modrm(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + insn_byte_t pfx, mod; + if (modrm->got) + return; + if (!insn->opcode.got) + insn_get_opcode(insn); + + if (inat_has_modrm(insn->attr)) { + mod = get_next(insn_byte_t, insn); + modrm->value = mod; + modrm->nbytes = 1; + if (inat_is_group(insn->attr)) { + pfx = insn_last_prefix(insn); + insn->attr = inat_get_group_attribute(mod, pfx, + insn->attr); + } + } + + if (insn->x86_64 && inat_is_force64(insn->attr)) + insn->opnd_bytes = 8; + modrm->got = 1; +} + + +/** + * insn_rip_relative() - Does instruction use RIP-relative addressing mode? + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. No effect if @insn->x86_64 is 0. + */ +int insn_rip_relative(struct insn *insn) +{ + struct insn_field *modrm = &insn->modrm; + + if (!insn->x86_64) + return 0; + if (!modrm->got) + insn_get_modrm(insn); + /* + * For rip-relative instructions, the mod field (top 2 bits) + * is zero and the r/m field (bottom 3 bits) is 0x5. + */ + return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); +} + +/** + * insn_get_sib() - Get the SIB byte of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * ModRM byte. + */ +void insn_get_sib(struct insn *insn) +{ + insn_byte_t modrm; + + if (insn->sib.got) + return; + if (!insn->modrm.got) + insn_get_modrm(insn); + if (insn->modrm.nbytes) { + modrm = (insn_byte_t)insn->modrm.value; + if (insn->addr_bytes != 2 && + X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { + insn->sib.value = get_next(insn_byte_t, insn); + insn->sib.nbytes = 1; + } + } + insn->sib.got = 1; +} + + +/** + * insn_get_displacement() - Get the displacement of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * SIB byte. + * Displacement value is sign-expanded. + */ +void insn_get_displacement(struct insn *insn) +{ + insn_byte_t mod, rm, base; + + if (insn->displacement.got) + return; + if (!insn->sib.got) + insn_get_sib(insn); + if (insn->modrm.nbytes) { + /* + * Interpreting the modrm byte: + * mod = 00 - no displacement fields (exceptions below) + * mod = 01 - 1-byte displacement field + * mod = 10 - displacement field is 4 bytes, or 2 bytes if + * address size = 2 (0x67 prefix in 32-bit mode) + * mod = 11 - no memory operand + * + * If address size = 2... + * mod = 00, r/m = 110 - displacement field is 2 bytes + * + * If address size != 2... + * mod != 11, r/m = 100 - SIB byte exists + * mod = 00, SIB base = 101 - displacement field is 4 bytes + * mod = 00, r/m = 101 - rip-relative addressing, displacement + * field is 4 bytes + */ + mod = X86_MODRM_MOD(insn->modrm.value); + rm = X86_MODRM_RM(insn->modrm.value); + base = X86_SIB_BASE(insn->sib.value); + if (mod == 3) + goto out; + if (mod == 1) { + insn->displacement.value = get_next(char, insn); + insn->displacement.nbytes = 1; + } else if (insn->addr_bytes == 2) { + if ((mod == 0 && rm == 6) || mod == 2) { + insn->displacement.value = + get_next(short, insn); + insn->displacement.nbytes = 2; + } + } else { + if ((mod == 0 && rm == 5) || mod == 2 || + (mod == 0 && base == 5)) { + insn->displacement.value = get_next(int, insn); + insn->displacement.nbytes = 4; + } + } + } +out: + insn->displacement.got = 1; +} + +/* Decode moffset16/32/64 */ +static void __get_moffset(struct insn *insn) +{ + switch (insn->addr_bytes) { + case 2: + insn->moffset1.value = get_next(short, insn); + insn->moffset1.nbytes = 2; + break; + case 4: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + break; + case 8: + insn->moffset1.value = get_next(int, insn); + insn->moffset1.nbytes = 4; + insn->moffset2.value = get_next(int, insn); + insn->moffset2.nbytes = 4; + break; + } + insn->moffset1.got = insn->moffset2.got = 1; +} + +/* Decode imm v32(Iz) */ +static void __get_immv32(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case 4: + case 8: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + } +} + +/* Decode imm v64(Iv/Ov) */ +static void __get_immv(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + } + insn->immediate1.got = insn->immediate2.got = 1; +} + +/* Decode ptr16:16/32(Ap) */ +static void __get_immptr(struct insn *insn) +{ + switch (insn->opnd_bytes) { + case 2: + insn->immediate1.value = get_next(short, insn); + insn->immediate1.nbytes = 2; + break; + case 4: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + break; + case 8: + /* ptr16:64 is not exist (no segment) */ + return; + } + insn->immediate2.value = get_next(unsigned short, insn); + insn->immediate2.nbytes = 2; + insn->immediate1.got = insn->immediate2.got = 1; +} + +/** + * insn_get_immediate() - Get the immediates of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * displacement bytes. + * Basically, most of immediates are sign-expanded. Unsigned-value can be + * get by bit masking with ((1 << (nbytes * 8)) - 1) + */ +void insn_get_immediate(struct insn *insn) +{ + if (insn->immediate.got) + return; + if (!insn->displacement.got) + insn_get_displacement(insn); + + if (inat_has_moffset(insn->attr)) { + __get_moffset(insn); + goto done; + } + + if (!inat_has_immediate(insn->attr)) + /* no immediates */ + goto done; + + switch (inat_immediate_size(insn->attr)) { + case INAT_IMM_BYTE: + insn->immediate.value = get_next(char, insn); + insn->immediate.nbytes = 1; + break; + case INAT_IMM_WORD: + insn->immediate.value = get_next(short, insn); + insn->immediate.nbytes = 2; + break; + case INAT_IMM_DWORD: + insn->immediate.value = get_next(int, insn); + insn->immediate.nbytes = 4; + break; + case INAT_IMM_QWORD: + insn->immediate1.value = get_next(int, insn); + insn->immediate1.nbytes = 4; + insn->immediate2.value = get_next(int, insn); + insn->immediate2.nbytes = 4; + break; + case INAT_IMM_PTR: + __get_immptr(insn); + break; + case INAT_IMM_VWORD32: + __get_immv32(insn); + break; + case INAT_IMM_VWORD: + __get_immv(insn); + break; + default: + break; + } + if (inat_has_second_immediate(insn->attr)) { + insn->immediate2.value = get_next(char, insn); + insn->immediate2.nbytes = 1; + } +done: + insn->immediate.got = 1; +} + +/** + * insn_get_length() - Get the length of instruction + * @insn: &struct insn containing instruction + * + * If necessary, first collects the instruction up to and including the + * immediates bytes. + */ +void insn_get_length(struct insn *insn) +{ + if (insn->length) + return; + if (!insn->immediate.got) + insn_get_immediate(insn); + insn->length = (unsigned char)((unsigned long)insn->next_byte + - (unsigned long)insn->kaddr); +} diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt new file mode 100644 index 0000000..083dd59 --- /dev/null +++ b/arch/x86/lib/x86-opcode-map.txt @@ -0,0 +1,719 @@ +# x86 Opcode Maps +# +# +# Table: table-name +# Referrer: escaped-name +# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# (or) +# opcode: escape # escaped-name +# EndTable +# +# +# GrpTable: GrpXXX +# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] +# EndTable +# + +Table: one byte opcode +Referrer: +# 0x00 - 0x0f +00: ADD Eb,Gb +01: ADD Ev,Gv +02: ADD Gb,Eb +03: ADD Gv,Ev +04: ADD AL,Ib +05: ADD rAX,Iz +06: PUSH ES (i64) +07: POP ES (i64) +08: OR Eb,Gb +09: OR Ev,Gv +0a: OR Gb,Eb +0b: OR Gv,Ev +0c: OR AL,Ib +0d: OR rAX,Iz +0e: PUSH CS (i64) +0f: escape # 2-byte escape +# 0x10 - 0x1f +10: ADC Eb,Gb +11: ADC Ev,Gv +12: ADC Gb,Eb +13: ADC Gv,Ev +14: ADC AL,Ib +15: ADC rAX,Iz +16: PUSH SS (i64) +17: POP SS (i64) +18: SBB Eb,Gb +19: SBB Ev,Gv +1a: SBB Gb,Eb +1b: SBB Gv,Ev +1c: SBB AL,Ib +1d: SBB rAX,Iz +1e: PUSH DS (i64) +1f: POP DS (i64) +# 0x20 - 0x2f +20: AND Eb,Gb +21: AND Ev,Gv +22: AND Gb,Eb +23: AND Gv,Ev +24: AND AL,Ib +25: AND rAx,Iz +26: SEG=ES (Prefix) +27: DAA (i64) +28: SUB Eb,Gb +29: SUB Ev,Gv +2a: SUB Gb,Eb +2b: SUB Gv,Ev +2c: SUB AL,Ib +2d: SUB rAX,Iz +2e: SEG=CS (Prefix) +2f: DAS (i64) +# 0x30 - 0x3f +30: XOR Eb,Gb +31: XOR Ev,Gv +32: XOR Gb,Eb +33: XOR Gv,Ev +34: XOR AL,Ib +35: XOR rAX,Iz +36: SEG=SS (Prefix) +37: AAA (i64) +38: CMP Eb,Gb +39: CMP Ev,Gv +3a: CMP Gb,Eb +3b: CMP Gv,Ev +3c: CMP AL,Ib +3d: CMP rAX,Iz +3e: SEG=DS (Prefix) +3f: AAS (i64) +# 0x40 - 0x4f +40: INC eAX (i64) | REX (o64) +41: INC eCX (i64) | REX.B (o64) +42: INC eDX (i64) | REX.X (o64) +43: INC eBX (i64) | REX.XB (o64) +44: INC eSP (i64) | REX.R (o64) +45: INC eBP (i64) | REX.RB (o64) +46: INC eSI (i64) | REX.RX (o64) +47: INC eDI (i64) | REX.RXB (o64) +48: DEC eAX (i64) | REX.W (o64) +49: DEC eCX (i64) | REX.WB (o64) +4a: DEC eDX (i64) | REX.WX (o64) +4b: DEC eBX (i64) | REX.WXB (o64) +4c: DEC eSP (i64) | REX.WR (o64) +4d: DEC eBP (i64) | REX.WRB (o64) +4e: DEC eSI (i64) | REX.WRX (o64) +4f: DEC eDI (i64) | REX.WRXB (o64) +# 0x50 - 0x5f +50: PUSH rAX/r8 (d64) +51: PUSH rCX/r9 (d64) +52: PUSH rDX/r10 (d64) +53: PUSH rBX/r11 (d64) +54: PUSH rSP/r12 (d64) +55: PUSH rBP/r13 (d64) +56: PUSH rSI/r14 (d64) +57: PUSH rDI/r15 (d64) +58: POP rAX/r8 (d64) +59: POP rCX/r9 (d64) +5a: POP rDX/r10 (d64) +5b: POP rBX/r11 (d64) +5c: POP rSP/r12 (d64) +5d: POP rBP/r13 (d64) +5e: POP rSI/r14 (d64) +5f: POP rDI/r15 (d64) +# 0x60 - 0x6f +60: PUSHA/PUSHAD (i64) +61: POPA/POPAD (i64) +62: BOUND Gv,Ma (i64) +63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) +64: SEG=FS (Prefix) +65: SEG=GS (Prefix) +66: Operand-Size (Prefix) +67: Address-Size (Prefix) +68: PUSH Iz (d64) +69: IMUL Gv,Ev,Iz +6a: PUSH Ib (d64) +6b: IMUL Gv,Ev,Ib +6c: INS/INSB Yb,DX +6d: INS/INSW/INSD Yz,DX +6e: OUTS/OUTSB DX,Xb +6f: OUTS/OUTSW/OUTSD DX,Xz +# 0x70 - 0x7f +70: JO Jb +71: JNO Jb +72: JB/JNAE/JC Jb +73: JNB/JAE/JNC Jb +74: JZ/JE Jb +75: JNZ/JNE Jb +76: JBE/JNA Jb +77: JNBE/JA Jb +78: JS Jb +79: JNS Jb +7a: JP/JPE Jb +7b: JNP/JPO Jb +7c: JL/JNGE Jb +7d: JNL/JGE Jb +7e: JLE/JNG Jb +7f: JNLE/JG Jb +# 0x80 - 0x8f +80: Grp1 Eb,Ib (1A) +81: Grp1 Ev,Iz (1A) +82: Grp1 Eb,Ib (1A),(i64) +83: Grp1 Ev,Ib (1A) +84: TEST Eb,Gb +85: TEST Ev,Gv +86: XCHG Eb,Gb +87: XCHG Ev,Gv +88: MOV Eb,Gb +89: MOV Ev,Gv +8a: MOV Gb,Eb +8b: MOV Gv,Ev +8c: MOV Ev,Sw +8d: LEA Gv,M +8e: MOV Sw,Ew +8f: Grp1A (1A) | POP Ev (d64) +# 0x90 - 0x9f +90: NOP | PAUSE (F3) | XCHG r8,rAX +91: XCHG rCX/r9,rAX +92: XCHG rDX/r10,rAX +93: XCHG rBX/r11,rAX +94: XCHG rSP/r12,rAX +95: XCHG rBP/r13,rAX +96: XCHG rSI/r14,rAX +97: XCHG rDI/r15,rAX +98: CBW/CWDE/CDQE +99: CWD/CDQ/CQO +9a: CALLF Ap (i64) +9b: FWAIT/WAIT +9c: PUSHF/D/Q Fv (d64) +9d: POPF/D/Q Fv (d64) +9e: SAHF +9f: LAHF +# 0xa0 - 0xaf +a0: MOV AL,Ob +a1: MOV rAX,Ov +a2: MOV Ob,AL +a3: MOV Ov,rAX +a4: MOVS/B Xb,Yb +a5: MOVS/W/D/Q Xv,Yv +a6: CMPS/B Xb,Yb +a7: CMPS/W/D Xv,Yv +a8: TEST AL,Ib +a9: TEST rAX,Iz +aa: STOS/B Yb,AL +ab: STOS/W/D/Q Yv,rAX +ac: LODS/B AL,Xb +ad: LODS/W/D/Q rAX,Xv +ae: SCAS/B AL,Yb +af: SCAS/W/D/Q rAX,Xv +# 0xb0 - 0xbf +b0: MOV AL/R8L,Ib +b1: MOV CL/R9L,Ib +b2: MOV DL/R10L,Ib +b3: MOV BL/R11L,Ib +b4: MOV AH/R12L,Ib +b5: MOV CH/R13L,Ib +b6: MOV DH/R14L,Ib +b7: MOV BH/R15L,Ib +b8: MOV rAX/r8,Iv +b9: MOV rCX/r9,Iv +ba: MOV rDX/r10,Iv +bb: MOV rBX/r11,Iv +bc: MOV rSP/r12,Iv +bd: MOV rBP/r13,Iv +be: MOV rSI/r14,Iv +bf: MOV rDI/r15,Iv +# 0xc0 - 0xcf +c0: Grp2 Eb,Ib (1A) +c1: Grp2 Ev,Ib (1A) +c2: RETN Iw (f64) +c3: RETN +c4: LES Gz,Mp (i64) +c5: LDS Gz,Mp (i64) +c6: Grp11 Eb,Ib (1A) +c7: Grp11 Ev,Iz (1A) +c8: ENTER Iw,Ib +c9: LEAVE (d64) +ca: RETF Iw +cb: RETF +cc: INT3 +cd: INT Ib +ce: INTO (i64) +cf: IRET/D/Q +# 0xd0 - 0xdf +d0: Grp2 Eb,1 (1A) +d1: Grp2 Ev,1 (1A) +d2: Grp2 Eb,CL (1A) +d3: Grp2 Ev,CL (1A) +d4: AAM Ib (i64) +d5: AAD Ib (i64) +d6: +d7: XLAT/XLATB +d8: ESC +d9: ESC +da: ESC +db: ESC +dc: ESC +dd: ESC +de: ESC +df: ESC +# 0xe0 - 0xef +e0: LOOPNE/LOOPNZ Jb (f64) +e1: LOOPE/LOOPZ Jb (f64) +e2: LOOP Jb (f64) +e3: JrCXZ Jb (f64) +e4: IN AL,Ib +e5: IN eAX,Ib +e6: OUT Ib,AL +e7: OUT Ib,eAX +e8: CALL Jz (f64) +e9: JMP-near Jz (f64) +ea: JMP-far Ap (i64) +eb: JMP-short Jb (f64) +ec: IN AL,DX +ed: IN eAX,DX +ee: OUT DX,AL +ef: OUT DX,eAX +# 0xf0 - 0xff +f0: LOCK (Prefix) +f1: +f2: REPNE (Prefix) +f3: REP/REPE (Prefix) +f4: HLT +f5: CMC +f6: Grp3_1 Eb (1A) +f7: Grp3_2 Ev (1A) +f8: CLC +f9: STC +fa: CLI +fb: STI +fc: CLD +fd: STD +fe: Grp4 (1A) +ff: Grp5 (1A) +EndTable + +Table: 2-byte opcode # First Byte is 0x0f +Referrer: 2-byte escape +# 0x0f 0x00-0x0f +00: Grp6 (1A) +01: Grp7 (1A) +02: LAR Gv,Ew +03: LSL Gv,Ew +04: +05: SYSCALL (o64) +06: CLTS +07: SYSRET (o64) +08: INVD +09: WBINVD +0a: +0b: UD2 (1B) +0c: +0d: NOP Ev +0e: +0f: +# 0x0f 0x10-0x1f +10: +11: +12: +13: +14: +15: +16: +17: +18: Grp16 (1A) +19: +1a: +1b: +1c: +1d: +1e: +1f: NOP Ev +# 0x0f 0x20-0x2f +20: MOV Rd,Cd +21: MOV Rd,Dd +22: MOV Cd,Rd +23: MOV Dd,Rd +24: +25: +26: +27: +28: movaps Vps,Wps | movapd Vpd,Wpd (66) +29: movaps Wps,Vps | movapd Wpd,Vpd (66) +2a: +2b: +2c: +2d: +2e: +2f: +# 0x0f 0x30-0x3f +30: WRMSR +31: RDTSC +32: RDMSR +33: RDPMC +34: SYSENTER +35: SYSEXIT +36: +37: GETSEC +38: escape # 3-byte escape 1 +39: +3a: escape # 3-byte escape 2 +3b: +3c: +3d: +3e: +3f: +# 0x0f 0x40-0x4f +40: CMOVO Gv,Ev +41: CMOVNO Gv,Ev +42: CMOVB/C/NAE Gv,Ev +43: CMOVAE/NB/NC Gv,Ev +44: CMOVE/Z Gv,Ev +45: CMOVNE/NZ Gv,Ev +46: CMOVBE/NA Gv,Ev +47: CMOVA/NBE Gv,Ev +48: CMOVS Gv,Ev +49: CMOVNS Gv,Ev +4a: CMOVP/PE Gv,Ev +4b: CMOVNP/PO Gv,Ev +4c: CMOVL/NGE Gv,Ev +4d: CMOVNL/GE Gv,Ev +4e: CMOVLE/NG Gv,Ev +4f: CMOVNLE/G Gv,Ev +# 0x0f 0x50-0x5f +50: +51: +52: +53: +54: +55: +56: +57: +58: +59: +5a: +5b: +5c: +5d: +5e: +5f: +# 0x0f 0x60-0x6f +60: +61: +62: +63: +64: +65: +66: +67: +68: +69: +6a: +6b: +6c: +6d: +6e: +6f: +# 0x0f 0x70-0x7f +70: +71: Grp12 (1A) +72: Grp13 (1A) +73: Grp14 (1A) +74: +75: +76: +77: +78: VMREAD Ed/q,Gd/q +79: VMWRITE Gd/q,Ed/q +7a: +7b: +7c: +7d: +7e: +7f: +# 0x0f 0x80-0x8f +80: JO Jz (f64) +81: JNO Jz (f64) +82: JB/JNAE/JC Jz (f64) +83: JNB/JAE/JNC Jz (f64) +84: JZ/JE Jz (f64) +85: JNZ/JNE Jz (f64) +86: JBE/JNA Jz (f64) +87: JNBE/JA Jz (f64) +88: JS Jz (f64) +89: JNS Jz (f64) +8a: JP/JPE Jz (f64) +8b: JNP/JPO Jz (f64) +8c: JL/JNGE Jz (f64) +8d: JNL/JGE Jz (f64) +8e: JLE/JNG Jz (f64) +8f: JNLE/JG Jz (f64) +# 0x0f 0x90-0x9f +90: SETO Eb +91: SETNO Eb +92: SETB/C/NAE Eb +93: SETAE/NB/NC Eb +94: SETE/Z Eb +95: SETNE/NZ Eb +96: SETBE/NA Eb +97: SETA/NBE Eb +98: SETS Eb +99: SETNS Eb +9a: SETP/PE Eb +9b: SETNP/PO Eb +9c: SETL/NGE Eb +9d: SETNL/GE Eb +9e: SETLE/NG Eb +9f: SETNLE/G Eb +# 0x0f 0xa0-0xaf +a0: PUSH FS (d64) +a1: POP FS (d64) +a2: CPUID +a3: BT Ev,Gv +a4: SHLD Ev,Gv,Ib +a5: SHLD Ev,Gv,CL +a6: +a7: GrpRNG +a8: PUSH GS (d64) +a9: POP GS (d64) +aa: RSM +ab: BTS Ev,Gv +ac: SHRD Ev,Gv,Ib +ad: SHRD Ev,Gv,CL +ae: Grp15 (1A),(1C) +af: IMUL Gv,Ev +# 0x0f 0xb0-0xbf +b0: CMPXCHG Eb,Gb +b1: CMPXCHG Ev,Gv +b2: LSS Gv,Mp +b3: BTR Ev,Gv +b4: LFS Gv,Mp +b5: LGS Gv,Mp +b6: MOVZX Gv,Eb +b7: MOVZX Gv,Ew +b8: JMPE | POPCNT Gv,Ev (F3) +b9: Grp10 (1A) +ba: Grp8 Ev,Ib (1A) +bb: BTC Ev,Gv +bc: BSF Gv,Ev +bd: BSR Gv,Ev +be: MOVSX Gv,Eb +bf: MOVSX Gv,Ew +# 0x0f 0xc0-0xcf +c0: XADD Eb,Gb +c1: XADD Ev,Gv +c2: +c3: movnti Md/q,Gd/q +c4: +c5: +c6: +c7: Grp9 (1A) +c8: BSWAP RAX/EAX/R8/R8D +c9: BSWAP RCX/ECX/R9/R9D +ca: BSWAP RDX/EDX/R10/R10D +cb: BSWAP RBX/EBX/R11/R11D +cc: BSWAP RSP/ESP/R12/R12D +cd: BSWAP RBP/EBP/R13/R13D +ce: BSWAP RSI/ESI/R14/R14D +cf: BSWAP RDI/EDI/R15/R15D +# 0x0f 0xd0-0xdf +d0: +d1: +d2: +d3: +d4: +d5: +d6: +d7: +d8: +d9: +da: +db: +dc: +dd: +de: +df: +# 0x0f 0xe0-0xef +e0: +e1: +e2: +e3: +e4: +e5: +e6: +e7: +e8: +e9: +ea: +eb: +ec: +ed: +ee: +ef: +# 0x0f 0xf0-0xff +f0: +f1: +f2: +f3: +f4: +f5: +f6: +f7: +f8: +f9: +fa: +fb: +fc: +fd: +fe: +ff: +EndTable + +Table: 3-byte opcode 1 +Referrer: 3-byte escape 1 +80: INVEPT Gd/q,Mdq (66) +81: INVPID Gd/q,Mdq (66) +f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) +f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) +EndTable + +Table: 3-byte opcode 2 +Referrer: 3-byte escape 2 +# all opcode is for SSE +EndTable + +GrpTable: Grp1 +0: ADD +1: OR +2: ADC +3: SBB +4: AND +5: SUB +6: XOR +7: CMP +EndTable + +GrpTable: Grp1A +0: POP +EndTable + +GrpTable: Grp2 +0: ROL +1: ROR +2: RCL +3: RCR +4: SHL/SAL +5: SHR +6: +7: SAR +EndTable + +GrpTable: Grp3_1 +0: TEST Eb,Ib +1: +2: NOT Eb +3: NEG Eb +4: MUL AL,Eb +5: IMUL AL,Eb +6: DIV AL,Eb +7: IDIV AL,Eb +EndTable + +GrpTable: Grp3_2 +0: TEST Ev,Iz +1: +2: NOT Ev +3: NEG Ev +4: MUL rAX,Ev +5: IMUL rAX,Ev +6: DIV rAX,Ev +7: IDIV rAX,Ev +EndTable + +GrpTable: Grp4 +0: INC Eb +1: DEC Eb +EndTable + +GrpTable: Grp5 +0: INC Ev +1: DEC Ev +2: CALLN Ev (f64) +3: CALLF Ep +4: JMPN Ev (f64) +5: JMPF Ep +6: PUSH Ev (d64) +7: +EndTable + +GrpTable: Grp6 +0: SLDT Rv/Mw +1: STR Rv/Mw +2: LLDT Ew +3: LTR Ew +4: VERR Ew +5: VERW Ew +EndTable + +GrpTable: Grp7 +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) +3: LIDT Ms +4: SMSW Mw/Rv +5: +6: LMSW Ew +7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) +EndTable + +GrpTable: Grp8 +4: BT +5: BTS +6: BTR +7: BTC +EndTable + +GrpTable: Grp9 +1: CMPXCHG8B/16B Mq/Mdq +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) +7: VMPTRST Mq +EndTable + +GrpTable: Grp10 +EndTable + +GrpTable: Grp11 +0: MOV +EndTable + +GrpTable: Grp12 +EndTable + +GrpTable: Grp13 +EndTable + +GrpTable: Grp14 +EndTable + +GrpTable: Grp15 +0: fxsave +1: fxstor +2: ldmxcsr +3: stmxcsr +4: XSAVE +5: XRSTOR | lfence (11B) +6: mfence (11B) +7: clflush | sfence (11B) +EndTable + +GrpTable: Grp16 +0: prefetch NTA +1: prefetch T0 +2: prefetch T1 +3: prefetch T2 +EndTable + +GrpTable: GrpRNG +0: xstore-rng +1: xcrypt-ecb +2: xcrypt-cbc +4: xcrypt-cfb +5: xcrypt-ofb +EndTable diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk new file mode 100644 index 0000000..93b62c9 --- /dev/null +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -0,0 +1,314 @@ +#!/bin/awk -f +# gen-insn-attr-x86.awk: Instruction attribute table generator +# Written by Masami Hiramatsu +# +# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c + +BEGIN { + print "/* x86 opcode map generated from x86-opcode-map.txt */" + print "/* Do not change this code. */" + ggid = 1 + geid = 1 + + opnd_expr = "^[[:alpha:]]" + ext_expr = "^\\(" + sep_expr = "^\\|$" + group_expr = "^Grp[[:alnum:]]+" + + imm_expr = "^[IJAO][[:lower:]]" + imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" + imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" + imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" + imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" + imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" + imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" + imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" + imm_flag["Ob"] = "INAT_MOFFSET" + imm_flag["Ov"] = "INAT_MOFFSET" + + modrm_expr = "^([CDEGMNPQRSUVW][[:lower:]]+|NTA|T[012])" + force64_expr = "\\([df]64\\)" + rex_expr = "^REX(\\.[XRWB]+)*" + fpu_expr = "^ESC" # TODO + + lprefix1_expr = "\\(66\\)" + delete lptable1 + lprefix2_expr = "\\(F2\\)" + delete lptable2 + lprefix3_expr = "\\(F3\\)" + delete lptable3 + max_lprefix = 4 + + prefix_expr = "\\(Prefix\\)" + prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" + prefix_num["REPNE"] = "INAT_PFX_REPNE" + prefix_num["REP/REPE"] = "INAT_PFX_REPE" + prefix_num["LOCK"] = "INAT_PFX_LOCK" + prefix_num["SEG=CS"] = "INAT_PFX_CS" + prefix_num["SEG=DS"] = "INAT_PFX_DS" + prefix_num["SEG=ES"] = "INAT_PFX_ES" + prefix_num["SEG=FS"] = "INAT_PFX_FS" + prefix_num["SEG=GS"] = "INAT_PFX_GS" + prefix_num["SEG=SS"] = "INAT_PFX_SS" + prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + + delete table + delete etable + delete gtable + eid = -1 + gid = -1 +} + +function semantic_error(msg) { + print "Semantic error at " NR ": " msg > "/dev/stderr" + exit 1 +} + +function debug(msg) { + print "DEBUG: " msg +} + +function array_size(arr, i,c) { + c = 0 + for (i in arr) + c++ + return c +} + +/^Table:/ { + print "/* " $0 " */" +} + +/^Referrer:/ { + if (NF == 1) { + # primary opcode table + tname = "inat_primary_table" + eid = -1 + } else { + # escape opcode table + ref = "" + for (i = 2; i <= NF; i++) + ref = ref $i + eid = escape[ref] + tname = sprintf("inat_escape_table_%d", eid) + } +} + +/^GrpTable:/ { + print "/* " $0 " */" + if (!($2 in group)) + semantic_error("No group: " $2 ) + gid = group[$2] + tname = "inat_group_table_" gid +} + +function print_table(tbl,name,fmt,n) +{ + print "const insn_attr_t " name " = {" + for (i = 0; i < n; i++) { + id = sprintf(fmt, i) + if (tbl[id]) + print " [" id "] = " tbl[id] "," + } + print "};" +} + +/^EndTable/ { + if (gid != -1) { + # print group tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", + "0x%x", 8) + gtable[gid,3] = tname "_3" + } + } else { + # print primary/escaped tables + if (array_size(table) != 0) { + print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,0] = tname + } + if (array_size(lptable1) != 0) { + print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,1] = tname "_1" + } + if (array_size(lptable2) != 0) { + print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,2] = tname "_2" + } + if (array_size(lptable3) != 0) { + print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", + "0x%02x", 256) + etable[eid,3] = tname "_3" + } + } + print "" + delete table + delete lptable1 + delete lptable2 + delete lptable3 + gid = -1 + eid = -1 +} + +function add_flags(old,new) { + if (old && new) + return old " | " new + else if (old) + return old + else + return new +} + +# convert operands to flags. +function convert_operands(opnd, i,imm,mod) +{ + imm = null + mod = null + for (i in opnd) { + i = opnd[i] + if (match(i, imm_expr) == 1) { + if (!imm_flag[i]) + semantic_error("Unknown imm opnd: " i) + if (imm) { + if (i != "Ib") + semantic_error("Second IMM error") + imm = add_flags(imm, "INAT_SCNDIMM") + } else + imm = imm_flag[i] + } else if (match(i, modrm_expr)) + mod = "INAT_MODRM" + } + return add_flags(imm, mod) +} + +/^[0-9a-f]+\:/ { + if (NR == 1) + next + # get index + idx = "0x" substr($1, 1, index($1,":") - 1) + if (idx in table) + semantic_error("Redefine " idx " in " tname) + + # check if escaped opcode + if ("escape" == $2) { + if ($3 != "#") + semantic_error("No escaped name") + ref = "" + for (i = 4; i <= NF; i++) + ref = ref $i + if (ref in escape) + semantic_error("Redefine escape (" ref ")") + escape[ref] = geid + geid++ + table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" + next + } + + variant = null + # converts + i = 2 + while (i <= NF) { + opcode = $(i++) + delete opnds + ext = null + flags = null + opnd = null + # parse one opcode + if (match($i, opnd_expr)) { + opnd = $i + split($(i++), opnds, ",") + flags = convert_operands(opnds) + } + if (match($i, ext_expr)) + ext = $(i++) + if (match($i, sep_expr)) + i++ + else if (i < NF) + semantic_error($i " is not a separator") + + # check if group opcode + if (match(opcode, group_expr)) { + if (!(opcode in group)) { + group[opcode] = ggid + ggid++ + } + flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") + } + # check force(or default) 64bit + if (match(ext, force64_expr)) + flags = add_flags(flags, "INAT_FORCE64") + + # check REX prefix + if (match(opcode, rex_expr)) + flags = add_flags(flags, "INAT_REXPFX") + + # check coprocessor escape : TODO + if (match(opcode, fpu_expr)) + flags = add_flags(flags, "INAT_MODRM") + + # check prefixes + if (match(ext, prefix_expr)) { + if (!prefix_num[opcode]) + semantic_error("Unknown prefix: " opcode) + flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") + } + if (length(flags) == 0) + continue + # check if last prefix + if (match(ext, lprefix1_expr)) { + lptable1[idx] = add_flags(lptable1[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix2_expr)) { + lptable2[idx] = add_flags(lptable2[idx],flags) + variant = "INAT_VARIANT" + } else if (match(ext, lprefix3_expr)) { + lptable3[idx] = add_flags(lptable3[idx],flags) + variant = "INAT_VARIANT" + } else { + table[idx] = add_flags(table[idx],flags) + } + } + if (variant) + table[idx] = add_flags(table[idx],variant) +} + +END { + # print escape opcode map's array + print "/* Escape opcode map array */" + print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ + "[INAT_LPREFIX_MAX + 1] = {" + for (i = 0; i < geid; i++) + for (j = 0; j < max_lprefix; j++) + if (etable[i,j]) + print " ["i"]["j"] = "etable[i,j]"," + print "};\n" + # print group opcode map's array + print "/* Group opcode map array */" + print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ + "[INAT_LPREFIX_MAX + 1] = {" + for (i = 0; i < ggid; i++) + for (j = 0; j < max_lprefix; j++) + if (gtable[i,j]) + print " ["i"]["j"] = "gtable[i,j]"," + print "};" +} -- cgit v0.10.2 From ca0e9badd1a39fecdd235f4bf1481b9da756e27b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:21 -0400 Subject: x86: X86 instruction decoder build-time selftest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a user-space selftest of x86 instruction decoder at kernel build time. When CONFIG_X86_DECODER_SELFTEST=y, Kbuild builds a test harness of x86 instruction decoder and performs it after building vmlinux. The test compares the results of objdump and x86 instruction decoder code and check there are no differences. Signed-off-by: Masami Hiramatsu Signed-off-by: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203421.31965.29006.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29..7d0b681 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -186,6 +186,15 @@ config X86_DS_SELFTEST config HAVE_MMIOTRACE_SUPPORT def_bool y +config X86_DECODER_SELFTEST + bool "x86 instruction decoder selftest" + depends on DEBUG_KERNEL + ---help--- + Perform x86 instruction decoder selftests at build time. + This option is useful for checking the sanity of x86 instruction + decoder code. + If unsure, say "N". + # # IO delay types: # diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1b68659..5fe16bf 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -154,6 +154,9 @@ all: bzImage KBUILD_IMAGE := $(boot)/bzImage bzImage: vmlinux +ifeq ($(CONFIG_X86_DECODER_SELFTEST),y) + $(Q)$(MAKE) $(build)=arch/x86/tools posttest +endif $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot $(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@ diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile new file mode 100644 index 0000000..3dd626b --- /dev/null +++ b/arch/x86/tools/Makefile @@ -0,0 +1,15 @@ +PHONY += posttest +quiet_cmd_posttest = TEST $@ + cmd_posttest = $(OBJDUMP) -d $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len + +posttest: $(obj)/test_get_len vmlinux + $(call cmd,posttest) + +hostprogs-y := test_get_len + +# -I needed for generated C source and C source which in the kernel tree. +HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ + +# Dependancies are also needed. +$(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c + diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk new file mode 100644 index 0000000..d433619 --- /dev/null +++ b/arch/x86/tools/distill.awk @@ -0,0 +1,42 @@ +#!/bin/awk -f +# Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len +# Distills the disassembly as follows: +# - Removes all lines except the disassembled instructions. +# - For instructions that exceed 1 line (7 bytes), crams all the hex bytes +# into a single line. +# - Remove bad(or prefix only) instructions + +BEGIN { + prev_addr = "" + prev_hex = "" + prev_mnemonic = "" + bad_expr = "(\\(bad\\)|^rex|^.byte|^rep(z|nz)$|^lock$|^es$|^cs$|^ss$|^ds$|^fs$|^gs$|^data(16|32)$|^addr(16|32|64))" + fwait_expr = "^9b " + fwait_str="9b\tfwait" +} + +/^ *[0-9a-f]+:/ { + if (split($0, field, "\t") < 3) { + # This is a continuation of the same insn. + prev_hex = prev_hex field[2] + } else { + # Skip bad instructions + if (match(prev_mnemonic, bad_expr)) + prev_addr = "" + # Split fwait from other f* instructions + if (match(prev_hex, fwait_expr) && prev_mnemonic != "fwait") { + printf "%s\t%s\n", prev_addr, fwait_str + sub(fwait_expr, "", prev_hex) + } + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic + prev_addr = field[1] + prev_hex = field[2] + prev_mnemonic = field[3] + } +} + +END { + if (prev_addr != "") + printf "%s\t%s\t%s\n", prev_addr, prev_hex, prev_mnemonic +} diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c new file mode 100644 index 0000000..1e81adb --- /dev/null +++ b/arch/x86/tools/test_get_len.c @@ -0,0 +1,113 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2009 + */ + +#include +#include +#include +#include + +#ifdef __x86_64__ +#define CONFIG_X86_64 +#else +#define CONFIG_X86_32 +#endif +#define unlikely(cond) (cond) + +#include +#include +#include + +/* + * Test of instruction analysis in general and insn_get_length() in + * particular. See if insn_get_length() and the disassembler agree + * on the length of each instruction in an elf disassembly. + * + * Usage: objdump -d a.out | awk -f distill.awk | ./test_get_len + */ + +const char *prog; + +static void usage(void) +{ + fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" + " ./test_get_len\n"); + exit(1); +} + +static void malformed_line(const char *line, int line_nr) +{ + fprintf(stderr, "%s: malformed line %d:\n%s", prog, line_nr, line); + exit(3); +} + +#define BUFSIZE 256 + +int main(int argc, char **argv) +{ + char line[BUFSIZE]; + unsigned char insn_buf[16]; + struct insn insn; + int insns = 0; + + prog = argv[0]; + if (argc > 1) + usage(); + + while (fgets(line, BUFSIZE, stdin)) { + char copy[BUFSIZE], *s, *tab1, *tab2; + int nb = 0; + unsigned int b; + + insns++; + memset(insn_buf, 0, 16); + strcpy(copy, line); + tab1 = strchr(copy, '\t'); + if (!tab1) + malformed_line(line, insns); + s = tab1 + 1; + s += strspn(s, " "); + tab2 = strchr(s, '\t'); + if (!tab2) + malformed_line(line, insns); + *tab2 = '\0'; /* Characters beyond tab2 aren't examined */ + while (s < tab2) { + if (sscanf(s, "%x", &b) == 1) { + insn_buf[nb++] = (unsigned char) b; + s += 3; + } else + break; + } + /* Decode an instruction */ +#ifdef __x86_64__ + insn_init(&insn, insn_buf, 1); +#else + insn_init(&insn, insn_buf, 0); +#endif + insn_get_length(&insn); + if (insn.length != nb) { + fprintf(stderr, "Error: %s", line); + fprintf(stderr, "Error: objdump says %d bytes, but " + "insn_get_length() says %d (attr:%x)\n", nb, + insn.length, insn.attr); + exit(2); + } + } + fprintf(stderr, "Succeed: decoded and checked %d instructions\n", + insns); + return 0; +} -- cgit v0.10.2 From b46b3d70c9c017d7c4ec49f7f3ffd0af5a622277 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:28 -0400 Subject: kprobes: Checks probe address is instruction boudary on x86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure safeness of inserting kprobes by checking whether the specified address is at the first byte of an instruction on x86. This is done by decoding probed function from its head to the probe point. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203428.31965.21939.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d..aa15f3e 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -48,12 +48,14 @@ #include #include #include +#include #include #include #include #include #include +#include void jprobe_return_end(void); @@ -244,6 +246,75 @@ retry: } } +/* Recover the probed instruction at addr for further analysis. */ +static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +{ + struct kprobe *kp; + kp = get_kprobe((void *)addr); + if (!kp) + return -EINVAL; + + /* + * Basically, kp->ainsn.insn has an original instruction. + * However, RIP-relative instruction can not do single-stepping + * at different place, fix_riprel() tweaks the displacement of + * that instruction. In that case, we can't recover the instruction + * from the kp->ainsn.insn. + * + * On the other hand, kp->opcode has a copy of the first byte of + * the probed instruction, which is overwritten by int3. And + * the instruction at kp->addr is not modified by kprobes except + * for the first byte, we can recover the original instruction + * from it and kp->opcode. + */ + memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + buf[0] = kp->opcode; + return 0; +} + +/* Dummy buffers for kallsyms_lookup */ +static char __dummy_buf[KSYM_NAME_LEN]; + +/* Check if paddr is at an instruction boundary */ +static int __kprobes can_probe(unsigned long paddr) +{ + int ret; + unsigned long addr, offset = 0; + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; + + if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) + return 0; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr) { + kernel_insn_init(&insn, (void *)addr); + insn_get_opcode(&insn); + + /* + * Check if the instruction has been modified by another + * kprobe, in which case we replace the breakpoint by the + * original instruction in our buffer. + */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { + ret = recover_probed_instruction(buf, addr); + if (ret) + /* + * Another debugging subsystem might insert + * this breakpoint. In that case, we can't + * recover it. + */ + return 0; + kernel_insn_init(&insn, buf); + } + insn_get_length(&insn); + addr += insn.length; + } + + return (addr == paddr); +} + /* * Returns non-zero if opcode modifies the interrupt flag. */ @@ -359,6 +430,8 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) int __kprobes arch_prepare_kprobe(struct kprobe *p) { + if (!can_probe((unsigned long)p->addr)) + return -EILSEQ; /* insn: must be on special executable page on x86. */ p->ainsn.insn = get_insn_slot(); if (!p->ainsn.insn) -- cgit v0.10.2 From 89ae465b0ee470f7d3f8a1c61353445c3acbbe2a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:36 -0400 Subject: kprobes: Cleanup fix_riprel() using insn decoder on x86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup fix_riprel() in arch/x86/kernel/kprobes.c by using the new x86 instruction decoder instead of using comparisons with raw ad hoc numeric opcodes. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203436.31965.34374.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index aa15f3e..16ae961 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -108,50 +108,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { /* ----------------------------------------------- */ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ }; -static const u32 onebyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ - W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ - W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ - W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ - W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ - W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ - W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ - W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ - W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ - W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ - W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ - W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ - W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ - W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ - W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; -static const u32 twobyte_has_modrm[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ----------------------------------------------- */ - W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ - W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ - W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ - W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ - W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ - W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ - W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ - W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ - W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ - W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ - W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; #undef W struct kretprobe_blackpoint kretprobe_blacklist[] = { @@ -348,68 +304,30 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) static void __kprobes fix_riprel(struct kprobe *p) { #ifdef CONFIG_X86_64 - u8 *insn = p->ainsn.insn; - s64 disp; - int need_modrm; - - /* Skip legacy instruction prefixes. */ - while (1) { - switch (*insn) { - case 0x66: - case 0x67: - case 0x2e: - case 0x3e: - case 0x26: - case 0x64: - case 0x65: - case 0x36: - case 0xf0: - case 0xf3: - case 0xf2: - ++insn; - continue; - } - break; - } + struct insn insn; + kernel_insn_init(&insn, p->ainsn.insn); - /* Skip REX instruction prefix. */ - if (is_REX_prefix(insn)) - ++insn; - - if (*insn == 0x0f) { - /* Two-byte opcode. */ - ++insn; - need_modrm = test_bit(*insn, - (unsigned long *)twobyte_has_modrm); - } else - /* One-byte opcode. */ - need_modrm = test_bit(*insn, - (unsigned long *)onebyte_has_modrm); - - if (need_modrm) { - u8 modrm = *++insn; - if ((modrm & 0xc7) == 0x05) { - /* %rip+disp32 addressing mode */ - /* Displacement follows ModRM byte. */ - ++insn; - /* - * The copied instruction uses the %rip-relative - * addressing mode. Adjust the displacement for the - * difference between the original location of this - * instruction and the location of the copy that will - * actually be run. The tricky bit here is making sure - * that the sign extension happens correctly in this - * calculation, since we need a signed 32-bit result to - * be sign-extended to 64 bits when it's added to the - * %rip value and yield the same 64-bit result that the - * sign-extension of the original signed 32-bit - * displacement would have given. - */ - disp = (u8 *) p->addr + *((s32 *) insn) - - (u8 *) p->ainsn.insn; - BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ - *(s32 *)insn = (s32) disp; - } + if (insn_rip_relative(&insn)) { + s64 newdisp; + u8 *disp; + insn_get_displacement(&insn); + /* + * The copied instruction uses the %rip-relative addressing + * mode. Adjust the displacement for the difference between + * the original location of this instruction and the location + * of the copy that will actually be run. The tricky bit here + * is making sure that the sign extension happens correctly in + * this calculation, since we need a signed 32-bit result to + * be sign-extended to 64 bits when it's added to the %rip + * value and yield the same 64-bit result that the sign- + * extension of the original signed 32-bit displacement would + * have given. + */ + newdisp = (u8 *) p->addr + (s64) insn.displacement.value - + (u8 *) p->ainsn.insn; + BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ + disp = (u8 *) p->ainsn.insn + insn_offset_displacement(&insn); + *(s32 *) disp = (s32) newdisp; } #endif } -- cgit v0.10.2 From b1cf540f0e5278ecfe8532557e547d833ed269d7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:44 -0400 Subject: x86: Add pt_regs register and stack access APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add following APIs for accessing registers and stack entries from pt_regs. These APIs are required by kprobes-based event tracer on ftrace. Some other debugging tools might be able to use it too. - regs_query_register_offset(const char *name) Query the offset of "name" register. - regs_query_register_name(unsigned int offset) Query the name of register by its offset. - regs_get_register(struct pt_regs *regs, unsigned int offset) Get the value of a register by its offset. - regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) Check the address is in the kernel stack. - regs_get_kernel_stack_nth(struct pt_regs *reg, unsigned int nth) Get Nth entry of the kernel stack. (N >= 0) - regs_get_argument_nth(struct pt_regs *reg, unsigned int nth) Get Nth argument at function call. (N >= 0) Signed-off-by: Masami Hiramatsu Cc: linux-arch@vger.kernel.org Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203444.31965.26374.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 0f0d908..a3d49dd 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -7,6 +7,7 @@ #ifdef __KERNEL__ #include +#include #endif #ifndef __ASSEMBLY__ @@ -216,6 +217,67 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) return regs->sp; } +/* Query offset/name of register from its name/offset */ +extern int regs_query_register_offset(const char *name); +extern const char *regs_query_register_name(unsigned int offset); +#define MAX_REG_OFFSET (offsetof(struct pt_regs, ss)) + +/** + * regs_get_register() - get register value from its offset + * @regs: pt_regs from which register value is gotten. + * @offset: offset number of the register. + * + * regs_get_register returns the value of a register whose offset from @regs + * is @offset. The @offset is the offset of the register in struct pt_regs. + * If @offset is bigger than MAX_REG_OFFSET, this returns 0. + */ +static inline unsigned long regs_get_register(struct pt_regs *regs, + unsigned int offset) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return 0; + return *(unsigned long *)((unsigned long)regs + offset); +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kenel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static inline int regs_within_kernel_stack(struct pt_regs *regs, + unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, + unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + +/* Get Nth argument at function call */ +extern unsigned long regs_get_argument_nth(struct pt_regs *regs, + unsigned int n); + /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 8d7d5c9..a33a17d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -49,6 +49,118 @@ enum x86_regset { REGSET_IOPERM32, }; +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { +#ifdef CONFIG_X86_64 + REG_OFFSET_NAME(r15), + REG_OFFSET_NAME(r14), + REG_OFFSET_NAME(r13), + REG_OFFSET_NAME(r12), + REG_OFFSET_NAME(r11), + REG_OFFSET_NAME(r10), + REG_OFFSET_NAME(r9), + REG_OFFSET_NAME(r8), +#endif + REG_OFFSET_NAME(bx), + REG_OFFSET_NAME(cx), + REG_OFFSET_NAME(dx), + REG_OFFSET_NAME(si), + REG_OFFSET_NAME(di), + REG_OFFSET_NAME(bp), + REG_OFFSET_NAME(ax), +#ifdef CONFIG_X86_32 + REG_OFFSET_NAME(ds), + REG_OFFSET_NAME(es), + REG_OFFSET_NAME(fs), + REG_OFFSET_NAME(gs), +#endif + REG_OFFSET_NAME(orig_ax), + REG_OFFSET_NAME(ip), + REG_OFFSET_NAME(cs), + REG_OFFSET_NAME(flags), + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(ss), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_query_register_name() - query register name from its offset + * @offset: the offset of a register in struct pt_regs. + * + * regs_query_register_name() returns the name of a register from its + * offset in struct pt_regs. If the @offset is invalid, this returns NULL; + */ +const char *regs_query_register_name(unsigned int offset) +{ + const struct pt_regs_offset *roff; + for (roff = regoffset_table; roff->name != NULL; roff++) + if (roff->offset == offset) + return roff->name; + return NULL; +} + +static const int arg_offs_table[] = { +#ifdef CONFIG_X86_32 + [0] = offsetof(struct pt_regs, ax), + [1] = offsetof(struct pt_regs, dx), + [2] = offsetof(struct pt_regs, cx) +#else /* CONFIG_X86_64 */ + [0] = offsetof(struct pt_regs, di), + [1] = offsetof(struct pt_regs, si), + [2] = offsetof(struct pt_regs, dx), + [3] = offsetof(struct pt_regs, cx), + [4] = offsetof(struct pt_regs, r8), + [5] = offsetof(struct pt_regs, r9) +#endif +}; + +/** + * regs_get_argument_nth() - get Nth argument at function call + * @regs: pt_regs which contains registers at function entry. + * @n: argument number. + * + * regs_get_argument_nth() returns @n th argument of a function call. + * Since usually the kernel stack will be changed right after function entry, + * you must use this at function entry. If the @n th entry is NOT in the + * kernel stack or pt_regs, this returns 0. + */ +unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) +{ + if (n < ARRAY_SIZE(arg_offs_table)) + return *((unsigned long *)regs + arg_offs_table[n]); + else { + /* + * The typical case: arg n is on the stack. + * (Note: stack[0] = return address, so skip it) + */ + n -= ARRAY_SIZE(arg_offs_table); + return regs_get_kernel_stack_nth(regs, 1 + n); + } +} + /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. -- cgit v0.10.2 From bd1a5c849bdcc5c89e4a6a18216cd2b9a7a8a78f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:34:53 -0400 Subject: tracing: Ftrace dynamic ftrace_event_call support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add dynamic ftrace_event_call support to ftrace. Trace engines can add new ftrace_event_call to ftrace on the fly. Each operator function of the call takes an ftrace_event_call data structure as an argument, because these functions may be shared among several ftrace_event_calls. Changes from v13: - Define remove_subsystem_dir() always (revirt a2ca5e03), because trace_remove_event_call() uses it. - Modify syscall tracer because of ftrace_event_call change. [fweisbec@gmail.com: Fixed conflict against latest tracing/core] Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203453.31965.71901.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index ace2da9..1ab3089 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -112,12 +112,12 @@ struct ftrace_event_call { struct dentry *dir; struct trace_event *event; int enabled; - int (*regfunc)(void *); - void (*unregfunc)(void *); + int (*regfunc)(struct ftrace_event_call *); + void (*unregfunc)(struct ftrace_event_call *); int id; - int (*raw_init)(void); - int (*show_format)(struct ftrace_event_call *call, - struct trace_seq *s); + int (*raw_init)(struct ftrace_event_call *); + int (*show_format)(struct ftrace_event_call *, + struct trace_seq *); int (*define_fields)(struct ftrace_event_call *); struct list_head fields; int filter_active; @@ -147,11 +147,12 @@ enum { FILTER_PTR_STRING, }; -extern int trace_define_field(struct ftrace_event_call *call, - const char *type, const char *name, - int offset, int size, int is_signed, - int filter_type); extern int trace_define_common_fields(struct ftrace_event_call *call); +extern int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size, int is_signed, + int filter_type); +extern int trace_add_event_call(struct ftrace_event_call *call); +extern void trace_remove_event_call(struct ftrace_event_call *call); #define is_signed_type(type) (((type)(-1)) < 0) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index f124c89..646102e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -165,7 +165,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - static int init_enter_##sname(void) \ + static int init_enter_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ num = syscall_name_to_nr("sys"#sname); \ @@ -202,7 +202,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - static int init_exit_##sname(void) \ + static int init_exit_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ num = syscall_name_to_nr("sys"#sname); \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 360a77a..f2bd7a8 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -434,7 +434,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * event_trace_printk(_RET_IP_, ": " ); * } * - * static int ftrace_reg_event_(void) + * static int ftrace_reg_event_(struct ftrace_event_call *unused) * { * int ret; * @@ -445,7 +445,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * return ret; * } * - * static void ftrace_unreg_event_(void) + * static void ftrace_unreg_event_(struct ftrace_event_call *unused) * { * unregister_trace_(ftrace_event_); * } @@ -478,7 +478,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * trace_current_buffer_unlock_commit(event, irq_flags, pc); * } * - * static int ftrace_raw_reg_event_(void) + * static int ftrace_raw_reg_event_(struct ftrace_event_call *unused) * { * int ret; * @@ -489,7 +489,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * return ret; * } * - * static void ftrace_unreg_event_(void) + * static void ftrace_unreg_event_(struct ftrace_event_call *unused) * { * unregister_trace_(ftrace_raw_event_); * } @@ -498,7 +498,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ * .trace = ftrace_raw_output_, <-- stage 2 * }; * - * static int ftrace_raw_init_event_(void) + * static int ftrace_raw_init_event_(struct ftrace_event_call *unused) * { * int id; * @@ -592,7 +592,7 @@ static void ftrace_raw_event_##call(proto) \ trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \ } \ \ -static int ftrace_raw_reg_event_##call(void *ptr) \ +static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ { \ int ret; \ \ @@ -603,7 +603,7 @@ static int ftrace_raw_reg_event_##call(void *ptr) \ return ret; \ } \ \ -static void ftrace_raw_unreg_event_##call(void *ptr) \ +static void ftrace_raw_unreg_event_##call(struct ftrace_event_call *unused)\ { \ unregister_trace_##call(ftrace_raw_event_##call); \ } \ @@ -612,7 +612,7 @@ static struct trace_event ftrace_event_type_##call = { \ .trace = ftrace_raw_output_##call, \ }; \ \ -static int ftrace_raw_init_event_##call(void) \ +static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ { \ int id; \ \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dc283b..e290b86 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -39,16 +39,19 @@ void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); extern struct trace_event event_syscall_enter; extern struct trace_event event_syscall_exit; -extern int reg_event_syscall_enter(void *ptr); -extern void unreg_event_syscall_enter(void *ptr); -extern int reg_event_syscall_exit(void *ptr); -extern void unreg_event_syscall_exit(void *ptr); + extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s); extern int syscall_enter_define_fields(struct ftrace_event_call *call); extern int syscall_exit_define_fields(struct ftrace_event_call *call); +extern int reg_event_syscall_enter(struct ftrace_event_call *call); +extern void unreg_event_syscall_enter(struct ftrace_event_call *call); +extern int reg_event_syscall_exit(struct ftrace_event_call *call); +extern void unreg_event_syscall_exit(struct ftrace_event_call *call); +extern int +ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s); enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d33bcde..8079bb5 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,8 +27,8 @@ DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); -int trace_define_field(struct ftrace_event_call *call, const char *type, - const char *name, int offset, int size, int is_signed, +int trace_define_field(struct ftrace_event_call *call, char *type, + char *name, int offset, int size, int is_signed, int filter_type) { struct ftrace_event_field *field; @@ -92,9 +92,7 @@ int trace_define_common_fields(struct ftrace_event_call *call) } EXPORT_SYMBOL_GPL(trace_define_common_fields); -#ifdef CONFIG_MODULES - -static void trace_destroy_fields(struct ftrace_event_call *call) +void trace_destroy_fields(struct ftrace_event_call *call) { struct ftrace_event_field *field, *next; @@ -106,8 +104,6 @@ static void trace_destroy_fields(struct ftrace_event_call *call) } } -#endif /* CONFIG_MODULES */ - static void ftrace_event_enable_disable(struct ftrace_event_call *call, int enable) { @@ -116,14 +112,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, if (call->enabled) { call->enabled = 0; tracing_stop_cmdline_record(); - call->unregfunc(call->data); + call->unregfunc(call); } break; case 1: if (!call->enabled) { call->enabled = 1; tracing_start_cmdline_record(); - call->regfunc(call->data); + call->regfunc(call); } break; } @@ -991,27 +987,43 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, return 0; } -#define for_each_event(event, start, end) \ - for (event = start; \ - (unsigned long)event < (unsigned long)end; \ - event++) +static int __trace_add_event_call(struct ftrace_event_call *call) +{ + struct dentry *d_events; + int ret; -#ifdef CONFIG_MODULES + if (!call->name) + return -EINVAL; -static LIST_HEAD(ftrace_module_file_list); + if (call->raw_init) { + ret = call->raw_init(call); + if (ret < 0) { + if (ret != -ENOSYS) + pr_warning("Could not initialize trace " + "events/%s\n", call->name); + return ret; + } + } -/* - * Modules must own their file_operations to keep up with - * reference counting. - */ -struct ftrace_module_file_ops { - struct list_head list; - struct module *mod; - struct file_operations id; - struct file_operations enable; - struct file_operations format; - struct file_operations filter; -}; + d_events = event_trace_events_dir(); + if (!d_events) + return -ENOENT; + + list_add(&call->list, &ftrace_events); + return event_create_dir(call, d_events, &ftrace_event_id_fops, + &ftrace_enable_fops, &ftrace_event_filter_fops, + &ftrace_event_format_fops); +} + +/* Add an additional event_call dynamically */ +int trace_add_event_call(struct ftrace_event_call *call) +{ + int ret; + mutex_lock(&event_mutex); + ret = __trace_add_event_call(call); + mutex_unlock(&event_mutex); + return ret; +} static void remove_subsystem_dir(const char *name) { @@ -1039,6 +1051,48 @@ static void remove_subsystem_dir(const char *name) } } +static void __trace_remove_event_call(struct ftrace_event_call *call) +{ + ftrace_event_enable_disable(call, 0); + if (call->event) + __unregister_ftrace_event(call->event); + debugfs_remove_recursive(call->dir); + list_del(&call->list); + trace_destroy_fields(call); + destroy_preds(call); + remove_subsystem_dir(call->system); +} + +/* Remove an event_call */ +void trace_remove_event_call(struct ftrace_event_call *call) +{ + mutex_lock(&event_mutex); + __trace_remove_event_call(call); + mutex_unlock(&event_mutex); +} + +#define for_each_event(event, start, end) \ + for (event = start; \ + (unsigned long)event < (unsigned long)end; \ + event++) + +#ifdef CONFIG_MODULES + +static LIST_HEAD(ftrace_module_file_list); + +/* + * Modules must own their file_operations to keep up with + * reference counting. + */ +struct ftrace_module_file_ops { + struct list_head list; + struct module *mod; + struct file_operations id; + struct file_operations enable; + struct file_operations format; + struct file_operations filter; +}; + static struct ftrace_module_file_ops * trace_create_file_ops(struct module *mod) { @@ -1096,7 +1150,7 @@ static void trace_module_add_events(struct module *mod) if (!call->name) continue; if (call->raw_init) { - ret = call->raw_init(); + ret = call->raw_init(call); if (ret < 0) { if (ret != -ENOSYS) pr_warning("Could not initialize trace " @@ -1131,14 +1185,7 @@ static void trace_module_remove_events(struct module *mod) list_for_each_entry_safe(call, p, &ftrace_events, list) { if (call->mod == mod) { found = true; - ftrace_event_enable_disable(call, 0); - if (call->event) - __unregister_ftrace_event(call->event); - debugfs_remove_recursive(call->dir); - list_del(&call->list); - trace_destroy_fields(call); - destroy_preds(call); - remove_subsystem_dir(call->system); + __trace_remove_event_call(call); } } @@ -1256,7 +1303,7 @@ static __init int event_trace_init(void) if (!call->name) continue; if (call->raw_init) { - ret = call->raw_init(); + ret = call->raw_init(call); if (ret < 0) { if (ret != -ENOSYS) pr_warning("Could not initialize trace " diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 029a91f..9cbe7f1 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -117,10 +117,16 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \ cmd; +static int ftrace_raw_init_event(struct ftrace_event_call *event_call) +{ + INIT_LIST_HEAD(&event_call->fields); + init_preds(event_call); + return 0; +} + #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ int ftrace_define_fields_##call(struct ftrace_event_call *event_call); \ -static int ftrace_raw_init_event_##call(void); \ \ struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -128,16 +134,10 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ .id = proto, \ .system = __stringify(TRACE_SYSTEM), \ - .raw_init = ftrace_raw_init_event_##call, \ + .raw_init = ftrace_raw_init_event, \ .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##call, \ -}; \ -static int ftrace_raw_init_event_##call(void) \ -{ \ - INIT_LIST_HEAD(&event_##call.fields); \ - init_preds(&event_##call); \ - return 0; \ -} \ +}; #undef TRACE_EVENT_FORMAT_NOFILTER #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, \ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 85291c4..5931933 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -193,8 +193,8 @@ int syscall_enter_define_fields(struct ftrace_event_call *call) return ret; for (i = 0; i < meta->nb_args; i++) { - ret = trace_define_field(call, meta->types[i], - meta->args[i], offset, + ret = trace_define_field(call, (char *)meta->types[i], + (char *)meta->args[i], offset, sizeof(unsigned long), 0, FILTER_OTHER); offset += sizeof(unsigned long); @@ -277,13 +277,13 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) trace_current_buffer_unlock_commit(event, 0, 0); } -int reg_event_syscall_enter(void *ptr) +int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; char *name; - name = (char *)ptr; + name = (char *)call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= FTRACE_SYSCALL_MAX) return -ENOSYS; @@ -301,12 +301,12 @@ int reg_event_syscall_enter(void *ptr) return ret; } -void unreg_event_syscall_enter(void *ptr) +void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; char *name; - name = (char *)ptr; + name = (char *)call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= FTRACE_SYSCALL_MAX) return; @@ -318,13 +318,13 @@ void unreg_event_syscall_enter(void *ptr) mutex_unlock(&syscall_trace_lock); } -int reg_event_syscall_exit(void *ptr) +int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; char *name; - name = (char *)ptr; + name = (char *)call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= FTRACE_SYSCALL_MAX) return -ENOSYS; @@ -342,12 +342,12 @@ int reg_event_syscall_exit(void *ptr) return ret; } -void unreg_event_syscall_exit(void *ptr) +void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; char *name; - name = (char *)ptr; + name = (char *)call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= FTRACE_SYSCALL_MAX) return; -- cgit v0.10.2 From d93f12f3f417e49a175800da85c6fcb2a5096e03 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:35:01 -0400 Subject: tracing: Introduce TRACE_FIELD_ZERO() macro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use TRACE_FIELD_ZERO(type, item) instead of TRACE_FIELD_ZERO_CHAR(item). This also includes a typo fix of TRACE_ZERO_CHAR() macro. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203501.31965.30172.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index 6db005e..e74f090 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -109,7 +109,7 @@ TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ip, ip) TRACE_FIELD(char *, fmt, fmt) - TRACE_FIELD_ZERO_CHAR(buf) + TRACE_FIELD_ZERO(char, buf) ), TP_RAW_FMT("%08lx (%d) fmt:%p %s") ); @@ -117,7 +117,7 @@ TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore, TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ip, ip) - TRACE_FIELD_ZERO_CHAR(buf) + TRACE_FIELD_ZERO(char, buf) ), TP_RAW_FMT("%08lx (%d) fmt:%p %s") ); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 9cbe7f1..f75faec 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -42,9 +42,9 @@ extern void __bad_type_size(void); if (!ret) \ return 0; -#undef TRACE_FIELD_ZERO_CHAR -#define TRACE_FIELD_ZERO_CHAR(item) \ - ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \ +#undef TRACE_FIELD_ZERO +#define TRACE_FIELD_ZERO(type, item) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ "offset:%u;\tsize:0;\n", \ (unsigned int)offsetof(typeof(field), item)); \ if (!ret) \ @@ -92,9 +92,6 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #include "trace_event_types.h" -#undef TRACE_ZERO_CHAR -#define TRACE_ZERO_CHAR(arg) - #undef TRACE_FIELD #define TRACE_FIELD(type, item, assign)\ entry->item = assign; @@ -107,6 +104,9 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #define TRACE_FIELD_SIGN(type, item, assign, is_signed) \ TRACE_FIELD(type, item, assign) +#undef TRACE_FIELD_ZERO +#define TRACE_FIELD_ZERO(type, item) + #undef TP_CMD #define TP_CMD(cmd...) cmd @@ -180,8 +180,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ if (ret) \ return ret; -#undef TRACE_FIELD_ZERO_CHAR -#define TRACE_FIELD_ZERO_CHAR(item) +#undef TRACE_FIELD_ZERO +#define TRACE_FIELD_ZERO(type, item) #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ -- cgit v0.10.2 From 413d37d1eb69c1765b9ace0a612dac9b6c990e66 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:35:11 -0400 Subject: tracing: Add kprobe-based event tracer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add kprobes-based event tracer on ftrace. This tracer is similar to the events tracer which is based on Tracepoint infrastructure. Instead of Tracepoint, this tracer is based on kprobes (kprobe and kretprobe). It probes anywhere where kprobes can probe(this means, all functions body except for __kprobes functions). Similar to the events tracer, this tracer doesn't need to be activated via current_tracer, instead of that, just set probe points via /sys/kernel/debug/tracing/kprobe_events. And you can set filters on each probe events via /sys/kernel/debug/tracing/events/kprobes//filter. This tracer supports following probe arguments for each probe. %REG : Fetch register REG sN : Fetch Nth entry of stack (N >= 0) sa : Fetch stack address. @ADDR : Fetch memory at ADDR (ADDR should be in kernel) @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) aN : Fetch function argument. (N >= 0) rv : Fetch return value. ra : Fetch return address. +|-offs(FETCHARG) : fetch memory at FETCHARG +|- offs address. See Documentation/trace/kprobetrace.txt in the next patch for details. Changes from v13: - Support 'sa' for stack address. - Use call->data instead of container_of() macro. [fweisbec@gmail.com: Fixed conflict against latest tracing/core] Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203510.31965.29123.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 06be85a..fb5fbf7 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -411,6 +411,18 @@ config BLK_DEV_IO_TRACE If unsure, say N. +config KPROBE_TRACER + depends on KPROBES + depends on X86 + bool "Trace kprobes" + select TRACING + select GENERIC_TRACER + help + This tracer probes everywhere where kprobes can probe it, and + records various registers and memories specified by user. + This also allows you to trace kprobe probe points as a dynamic + defined events. It provides per-probe event filtering interface. + config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" depends on FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 844164d..7c00a1e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -54,5 +54,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o +obj-$(CONFIG_KPROBE_TRACER) += trace_kprobe.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 654fd65..667f832 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -38,6 +38,8 @@ enum trace_type { TRACE_KMEM_FREE, TRACE_POWER, TRACE_BLK, + TRACE_KPROBE, + TRACE_KRETPROBE, __TRACE_LAST_TYPE, }; @@ -205,6 +207,30 @@ struct syscall_trace_exit { unsigned long ret; }; +struct kprobe_trace_entry { + struct trace_entry ent; + unsigned long ip; + int nargs; + unsigned long args[]; +}; + +#define SIZEOF_KPROBE_TRACE_ENTRY(n) \ + (offsetof(struct kprobe_trace_entry, args) + \ + (sizeof(unsigned long) * (n))) + +struct kretprobe_trace_entry { + struct trace_entry ent; + unsigned long func; + unsigned long ret_ip; + int nargs; + unsigned long args[]; +}; + +#define SIZEOF_KRETPROBE_TRACE_ENTRY(n) \ + (offsetof(struct kretprobe_trace_entry, args) + \ + (sizeof(unsigned long) * (n))) + + /* * trace_flag_type is an enumeration that holds different @@ -317,6 +343,10 @@ extern void __ftrace_bad_type(void); TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ TRACE_KMEM_FREE); \ + IF_ASSIGN(var, ent, struct kprobe_trace_entry, \ + TRACE_KPROBE); \ + IF_ASSIGN(var, ent, struct kretprobe_trace_entry, \ + TRACE_KRETPROBE); \ __ftrace_bad_type(); \ } while (0) diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index e74f090..186b598 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -175,4 +175,22 @@ TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore, TP_RAW_FMT("type:%u call_site:%lx ptr:%p") ); +TRACE_EVENT_FORMAT(kprobe, TRACE_KPROBE, kprobe_trace_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned long, ip, ip) + TRACE_FIELD(int, nargs, nargs) + TRACE_FIELD_ZERO(unsigned long, args) + ), + TP_RAW_FMT("%08lx: args:0x%lx ...") +); + +TRACE_EVENT_FORMAT(kretprobe, TRACE_KRETPROBE, kretprobe_trace_entry, ignore, + TRACE_STRUCT( + TRACE_FIELD(unsigned long, func, func) + TRACE_FIELD(unsigned long, ret_ip, ret_ip) + TRACE_FIELD(int, nargs, nargs) + TRACE_FIELD_ZERO(unsigned long, args) + ), + TP_RAW_FMT("%08lx <- %08lx: args:0x%lx ...") +); #undef TRACE_SYSTEM diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c new file mode 100644 index 0000000..0c4f00a --- /dev/null +++ b/kernel/trace/trace_kprobe.c @@ -0,0 +1,1202 @@ +/* + * kprobe based kernel tracer + * + * Created by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace.h" +#include "trace_output.h" + +#define TRACE_KPROBE_ARGS 6 +#define MAX_ARGSTR_LEN 63 + +/* currently, trace_kprobe only supports X86. */ + +struct fetch_func { + unsigned long (*func)(struct pt_regs *, void *); + void *data; +}; + +static __kprobes unsigned long call_fetch(struct fetch_func *f, + struct pt_regs *regs) +{ + return f->func(regs, f->data); +} + +/* fetch handlers */ +static __kprobes unsigned long fetch_register(struct pt_regs *regs, + void *offset) +{ + return regs_get_register(regs, (unsigned int)((unsigned long)offset)); +} + +static __kprobes unsigned long fetch_stack(struct pt_regs *regs, + void *num) +{ + return regs_get_kernel_stack_nth(regs, + (unsigned int)((unsigned long)num)); +} + +static __kprobes unsigned long fetch_memory(struct pt_regs *regs, void *addr) +{ + unsigned long retval; + + if (probe_kernel_address(addr, retval)) + return 0; + return retval; +} + +static __kprobes unsigned long fetch_argument(struct pt_regs *regs, void *num) +{ + return regs_get_argument_nth(regs, (unsigned int)((unsigned long)num)); +} + +static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, + void *dummy) +{ + return regs_return_value(regs); +} + +static __kprobes unsigned long fetch_ip(struct pt_regs *regs, void *dummy) +{ + return instruction_pointer(regs); +} + +static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs, + void *dummy) +{ + return kernel_stack_pointer(regs); +} + +/* Memory fetching by symbol */ +struct symbol_cache { + char *symbol; + long offset; + unsigned long addr; +}; + +static unsigned long update_symbol_cache(struct symbol_cache *sc) +{ + sc->addr = (unsigned long)kallsyms_lookup_name(sc->symbol); + if (sc->addr) + sc->addr += sc->offset; + return sc->addr; +} + +static void free_symbol_cache(struct symbol_cache *sc) +{ + kfree(sc->symbol); + kfree(sc); +} + +static struct symbol_cache *alloc_symbol_cache(const char *sym, long offset) +{ + struct symbol_cache *sc; + + if (!sym || strlen(sym) == 0) + return NULL; + sc = kzalloc(sizeof(struct symbol_cache), GFP_KERNEL); + if (!sc) + return NULL; + + sc->symbol = kstrdup(sym, GFP_KERNEL); + if (!sc->symbol) { + kfree(sc); + return NULL; + } + sc->offset = offset; + + update_symbol_cache(sc); + return sc; +} + +static __kprobes unsigned long fetch_symbol(struct pt_regs *regs, void *data) +{ + struct symbol_cache *sc = data; + + if (sc->addr) + return fetch_memory(regs, (void *)sc->addr); + else + return 0; +} + +/* Special indirect memory access interface */ +struct indirect_fetch_data { + struct fetch_func orig; + long offset; +}; + +static __kprobes unsigned long fetch_indirect(struct pt_regs *regs, void *data) +{ + struct indirect_fetch_data *ind = data; + unsigned long addr; + + addr = call_fetch(&ind->orig, regs); + if (addr) { + addr += ind->offset; + return fetch_memory(regs, (void *)addr); + } else + return 0; +} + +static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) +{ + if (data->orig.func == fetch_indirect) + free_indirect_fetch_data(data->orig.data); + else if (data->orig.func == fetch_symbol) + free_symbol_cache(data->orig.data); + kfree(data); +} + +/** + * kprobe_trace_core + */ + +struct trace_probe { + struct list_head list; + union { + struct kprobe kp; + struct kretprobe rp; + }; + const char *symbol; /* symbol name */ + unsigned int nr_args; + struct fetch_func args[TRACE_KPROBE_ARGS]; + struct ftrace_event_call call; +}; + +static int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs); +static int kretprobe_trace_func(struct kretprobe_instance *ri, + struct pt_regs *regs); + +static __kprobes int probe_is_return(struct trace_probe *tp) +{ + return (tp->rp.handler == kretprobe_trace_func); +} + +static __kprobes const char *probe_symbol(struct trace_probe *tp) +{ + return tp->symbol ? tp->symbol : "unknown"; +} + +static __kprobes long probe_offset(struct trace_probe *tp) +{ + return (probe_is_return(tp)) ? tp->rp.kp.offset : tp->kp.offset; +} + +static __kprobes void *probe_address(struct trace_probe *tp) +{ + return (probe_is_return(tp)) ? tp->rp.kp.addr : tp->kp.addr; +} + +static int trace_arg_string(char *buf, size_t n, struct fetch_func *ff) +{ + int ret = -EINVAL; + + if (ff->func == fetch_argument) + ret = snprintf(buf, n, "a%lu", (unsigned long)ff->data); + else if (ff->func == fetch_register) { + const char *name; + name = regs_query_register_name((unsigned int)((long)ff->data)); + ret = snprintf(buf, n, "%%%s", name); + } else if (ff->func == fetch_stack) + ret = snprintf(buf, n, "s%lu", (unsigned long)ff->data); + else if (ff->func == fetch_memory) + ret = snprintf(buf, n, "@0x%p", ff->data); + else if (ff->func == fetch_symbol) { + struct symbol_cache *sc = ff->data; + ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset); + } else if (ff->func == fetch_retvalue) + ret = snprintf(buf, n, "rv"); + else if (ff->func == fetch_ip) + ret = snprintf(buf, n, "ra"); + else if (ff->func == fetch_stack_address) + ret = snprintf(buf, n, "sa"); + else if (ff->func == fetch_indirect) { + struct indirect_fetch_data *id = ff->data; + size_t l = 0; + ret = snprintf(buf, n, "%+ld(", id->offset); + if (ret >= n) + goto end; + l += ret; + ret = trace_arg_string(buf + l, n - l, &id->orig); + if (ret < 0) + goto end; + l += ret; + ret = snprintf(buf + l, n - l, ")"); + ret += l; + } +end: + if (ret >= n) + return -ENOSPC; + return ret; +} + +static int register_probe_event(struct trace_probe *tp); +static void unregister_probe_event(struct trace_probe *tp); + +static DEFINE_MUTEX(probe_lock); +static LIST_HEAD(probe_list); + +static struct trace_probe *alloc_trace_probe(const char *symbol, + const char *event) +{ + struct trace_probe *tp; + + tp = kzalloc(sizeof(struct trace_probe), GFP_KERNEL); + if (!tp) + return ERR_PTR(-ENOMEM); + + if (symbol) { + tp->symbol = kstrdup(symbol, GFP_KERNEL); + if (!tp->symbol) + goto error; + } + if (event) { + tp->call.name = kstrdup(event, GFP_KERNEL); + if (!tp->call.name) + goto error; + } + + INIT_LIST_HEAD(&tp->list); + return tp; +error: + kfree(tp->symbol); + kfree(tp); + return ERR_PTR(-ENOMEM); +} + +static void free_trace_probe(struct trace_probe *tp) +{ + int i; + + for (i = 0; i < tp->nr_args; i++) + if (tp->args[i].func == fetch_symbol) + free_symbol_cache(tp->args[i].data); + else if (tp->args[i].func == fetch_indirect) + free_indirect_fetch_data(tp->args[i].data); + + kfree(tp->call.name); + kfree(tp->symbol); + kfree(tp); +} + +static struct trace_probe *find_probe_event(const char *event) +{ + struct trace_probe *tp; + + list_for_each_entry(tp, &probe_list, list) + if (tp->call.name && !strcmp(tp->call.name, event)) + return tp; + return NULL; +} + +static void __unregister_trace_probe(struct trace_probe *tp) +{ + if (probe_is_return(tp)) + unregister_kretprobe(&tp->rp); + else + unregister_kprobe(&tp->kp); +} + +/* Unregister a trace_probe and probe_event: call with locking probe_lock */ +static void unregister_trace_probe(struct trace_probe *tp) +{ + if (tp->call.name) + unregister_probe_event(tp); + __unregister_trace_probe(tp); + list_del(&tp->list); +} + +/* Register a trace_probe and probe_event */ +static int register_trace_probe(struct trace_probe *tp) +{ + struct trace_probe *old_tp; + int ret; + + mutex_lock(&probe_lock); + + if (probe_is_return(tp)) + ret = register_kretprobe(&tp->rp); + else + ret = register_kprobe(&tp->kp); + + if (ret) { + pr_warning("Could not insert probe(%d)\n", ret); + if (ret == -EILSEQ) { + pr_warning("Probing address(0x%p) is not an " + "instruction boundary.\n", + probe_address(tp)); + ret = -EINVAL; + } + goto end; + } + /* register as an event */ + if (tp->call.name) { + old_tp = find_probe_event(tp->call.name); + if (old_tp) { + /* delete old event */ + unregister_trace_probe(old_tp); + free_trace_probe(old_tp); + } + ret = register_probe_event(tp); + if (ret) { + pr_warning("Faild to register probe event(%d)\n", ret); + __unregister_trace_probe(tp); + } + } + list_add_tail(&tp->list, &probe_list); +end: + mutex_unlock(&probe_lock); + return ret; +} + +/* Split symbol and offset. */ +static int split_symbol_offset(char *symbol, long *offset) +{ + char *tmp; + int ret; + + if (!offset) + return -EINVAL; + + tmp = strchr(symbol, '+'); + if (!tmp) + tmp = strchr(symbol, '-'); + + if (tmp) { + /* skip sign because strict_strtol doesn't accept '+' */ + ret = strict_strtol(tmp + 1, 0, offset); + if (ret) + return ret; + if (*tmp == '-') + *offset = -(*offset); + *tmp = '\0'; + } else + *offset = 0; + return 0; +} + +#define PARAM_MAX_ARGS 16 +#define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) + +static int parse_trace_arg(char *arg, struct fetch_func *ff, int is_return) +{ + int ret = 0; + unsigned long param; + long offset; + char *tmp; + + switch (arg[0]) { + case 'a': /* argument */ + ret = strict_strtoul(arg + 1, 10, ¶m); + if (ret || param > PARAM_MAX_ARGS) + ret = -EINVAL; + else { + ff->func = fetch_argument; + ff->data = (void *)param; + } + break; + case 'r': /* retval or retaddr */ + if (is_return && arg[1] == 'v') { + ff->func = fetch_retvalue; + ff->data = NULL; + } else if (is_return && arg[1] == 'a') { + ff->func = fetch_ip; + ff->data = NULL; + } else + ret = -EINVAL; + break; + case '%': /* named register */ + ret = regs_query_register_offset(arg + 1); + if (ret >= 0) { + ff->func = fetch_register; + ff->data = (void *)(unsigned long)ret; + ret = 0; + } + break; + case 's': /* stack */ + if (arg[1] == 'a') { + ff->func = fetch_stack_address; + ff->data = NULL; + } else { + ret = strict_strtoul(arg + 1, 10, ¶m); + if (ret || param > PARAM_MAX_STACK) + ret = -EINVAL; + else { + ff->func = fetch_stack; + ff->data = (void *)param; + } + } + break; + case '@': /* memory or symbol */ + if (isdigit(arg[1])) { + ret = strict_strtoul(arg + 1, 0, ¶m); + if (ret) + break; + ff->func = fetch_memory; + ff->data = (void *)param; + } else { + ret = split_symbol_offset(arg + 1, &offset); + if (ret) + break; + ff->data = alloc_symbol_cache(arg + 1, + offset); + if (ff->data) + ff->func = fetch_symbol; + else + ret = -EINVAL; + } + break; + case '+': /* indirect memory */ + case '-': + tmp = strchr(arg, '('); + if (!tmp) { + ret = -EINVAL; + break; + } + *tmp = '\0'; + ret = strict_strtol(arg + 1, 0, &offset); + if (ret) + break; + if (arg[0] == '-') + offset = -offset; + arg = tmp + 1; + tmp = strrchr(arg, ')'); + if (tmp) { + struct indirect_fetch_data *id; + *tmp = '\0'; + id = kzalloc(sizeof(struct indirect_fetch_data), + GFP_KERNEL); + if (!id) + return -ENOMEM; + id->offset = offset; + ret = parse_trace_arg(arg, &id->orig, is_return); + if (ret) + kfree(id); + else { + ff->func = fetch_indirect; + ff->data = (void *)id; + } + } else + ret = -EINVAL; + break; + default: + /* TODO: support custom handler */ + ret = -EINVAL; + } + return ret; +} + +static int create_trace_probe(int argc, char **argv) +{ + /* + * Argument syntax: + * - Add kprobe: p[:EVENT] SYMBOL[+OFFS|-OFFS]|ADDRESS [FETCHARGS] + * - Add kretprobe: r[:EVENT] SYMBOL[+0] [FETCHARGS] + * Fetch args: + * aN : fetch Nth of function argument. (N:0-) + * rv : fetch return value + * ra : fetch return address + * sa : fetch stack address + * sN : fetch Nth of stack (N:0-) + * @ADDR : fetch memory at ADDR (ADDR should be in kernel) + * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) + * %REG : fetch register REG + * Indirect memory fetch: + * +|-offs(ARG) : fetch memory at ARG +|- offs address. + */ + struct trace_probe *tp; + struct kprobe *kp; + int i, ret = 0; + int is_return = 0; + char *symbol = NULL, *event = NULL; + long offset = 0; + void *addr = NULL; + + if (argc < 2) + return -EINVAL; + + if (argv[0][0] == 'p') + is_return = 0; + else if (argv[0][0] == 'r') + is_return = 1; + else + return -EINVAL; + + if (argv[0][1] == ':') { + event = &argv[0][2]; + if (strlen(event) == 0) { + pr_info("Event name is not specifiled\n"); + return -EINVAL; + } + } + + if (isdigit(argv[1][0])) { + if (is_return) + return -EINVAL; + /* an address specified */ + ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); + if (ret) + return ret; + } else { + /* a symbol specified */ + symbol = argv[1]; + /* TODO: support .init module functions */ + ret = split_symbol_offset(symbol, &offset); + if (ret) + return ret; + if (offset && is_return) + return -EINVAL; + } + + /* setup a probe */ + tp = alloc_trace_probe(symbol, event); + if (IS_ERR(tp)) + return PTR_ERR(tp); + + if (is_return) { + kp = &tp->rp.kp; + tp->rp.handler = kretprobe_trace_func; + } else { + kp = &tp->kp; + tp->kp.pre_handler = kprobe_trace_func; + } + + if (tp->symbol) { + kp->symbol_name = tp->symbol; + kp->offset = offset; + } else + kp->addr = addr; + + /* parse arguments */ + argc -= 2; argv += 2; ret = 0; + for (i = 0; i < argc && i < TRACE_KPROBE_ARGS; i++) { + if (strlen(argv[i]) > MAX_ARGSTR_LEN) { + pr_info("Argument%d(%s) is too long.\n", i, argv[i]); + ret = -ENOSPC; + goto error; + } + ret = parse_trace_arg(argv[i], &tp->args[i], is_return); + if (ret) + goto error; + } + tp->nr_args = i; + + ret = register_trace_probe(tp); + if (ret) + goto error; + return 0; + +error: + free_trace_probe(tp); + return ret; +} + +static void cleanup_all_probes(void) +{ + struct trace_probe *tp; + + mutex_lock(&probe_lock); + /* TODO: Use batch unregistration */ + while (!list_empty(&probe_list)) { + tp = list_entry(probe_list.next, struct trace_probe, list); + unregister_trace_probe(tp); + free_trace_probe(tp); + } + mutex_unlock(&probe_lock); +} + + +/* Probes listing interfaces */ +static void *probes_seq_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&probe_lock); + return seq_list_start(&probe_list, *pos); +} + +static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + return seq_list_next(v, &probe_list, pos); +} + +static void probes_seq_stop(struct seq_file *m, void *v) +{ + mutex_unlock(&probe_lock); +} + +static int probes_seq_show(struct seq_file *m, void *v) +{ + struct trace_probe *tp = v; + int i, ret; + char buf[MAX_ARGSTR_LEN + 1]; + + seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); + if (tp->call.name) + seq_printf(m, ":%s", tp->call.name); + + if (tp->symbol) + seq_printf(m, " %s%+ld", probe_symbol(tp), probe_offset(tp)); + else + seq_printf(m, " 0x%p", probe_address(tp)); + + for (i = 0; i < tp->nr_args; i++) { + ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); + if (ret < 0) { + pr_warning("Argument%d decoding error(%d).\n", i, ret); + return ret; + } + seq_printf(m, " %s", buf); + } + seq_printf(m, "\n"); + return 0; +} + +static const struct seq_operations probes_seq_op = { + .start = probes_seq_start, + .next = probes_seq_next, + .stop = probes_seq_stop, + .show = probes_seq_show +}; + +static int probes_open(struct inode *inode, struct file *file) +{ + if ((file->f_mode & FMODE_WRITE) && + (file->f_flags & O_TRUNC)) + cleanup_all_probes(); + + return seq_open(file, &probes_seq_op); +} + +static int command_trace_probe(const char *buf) +{ + char **argv; + int argc = 0, ret = 0; + + argv = argv_split(GFP_KERNEL, buf, &argc); + if (!argv) + return -ENOMEM; + + if (argc) + ret = create_trace_probe(argc, argv); + + argv_free(argv); + return ret; +} + +#define WRITE_BUFSIZE 128 + +static ssize_t probes_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + char *kbuf, *tmp; + int ret; + size_t done; + size_t size; + + kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + ret = done = 0; + while (done < count) { + size = count - done; + if (size >= WRITE_BUFSIZE) + size = WRITE_BUFSIZE - 1; + if (copy_from_user(kbuf, buffer + done, size)) { + ret = -EFAULT; + goto out; + } + kbuf[size] = '\0'; + tmp = strchr(kbuf, '\n'); + if (tmp) { + *tmp = '\0'; + size = tmp - kbuf + 1; + } else if (done + size < count) { + pr_warning("Line length is too long: " + "Should be less than %d.", WRITE_BUFSIZE); + ret = -EINVAL; + goto out; + } + done += size; + /* Remove comments */ + tmp = strchr(kbuf, '#'); + if (tmp) + *tmp = '\0'; + + ret = command_trace_probe(kbuf); + if (ret) + goto out; + } + ret = done; +out: + kfree(kbuf); + return ret; +} + +static const struct file_operations kprobe_events_ops = { + .owner = THIS_MODULE, + .open = probes_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .write = probes_write, +}; + +/* Kprobe handler */ +static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(kp, struct trace_probe, kp); + struct kprobe_trace_entry *entry; + struct ring_buffer_event *event; + int size, i, pc; + unsigned long irq_flags; + struct ftrace_event_call *call = &event_kprobe; + + if (&tp->call.name) + call = &tp->call; + + local_save_flags(irq_flags); + pc = preempt_count(); + + size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); + + event = trace_current_buffer_lock_reserve(TRACE_KPROBE, size, + irq_flags, pc); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->nargs = tp->nr_args; + entry->ip = (unsigned long)kp->addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i], regs); + + if (!filter_current_check_discard(call, entry, event)) + trace_nowake_buffer_unlock_commit(event, irq_flags, pc); + return 0; +} + +/* Kretprobe handler */ +static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); + struct kretprobe_trace_entry *entry; + struct ring_buffer_event *event; + int size, i, pc; + unsigned long irq_flags; + struct ftrace_event_call *call = &event_kretprobe; + + if (&tp->call.name) + call = &tp->call; + + local_save_flags(irq_flags); + pc = preempt_count(); + + size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); + + event = trace_current_buffer_lock_reserve(TRACE_KRETPROBE, size, + irq_flags, pc); + if (!event) + return 0; + + entry = ring_buffer_event_data(event); + entry->nargs = tp->nr_args; + entry->func = (unsigned long)probe_address(tp); + entry->ret_ip = (unsigned long)ri->ret_addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i], regs); + + if (!filter_current_check_discard(call, entry, event)) + trace_nowake_buffer_unlock_commit(event, irq_flags, pc); + + return 0; +} + +/* Event entry printers */ +enum print_line_t +print_kprobe_event(struct trace_iterator *iter, int flags) +{ + struct kprobe_trace_entry *field; + struct trace_seq *s = &iter->seq; + int i; + + trace_assign_type(field, iter->ent); + + if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) + goto partial; + + if (!trace_seq_puts(s, ":")) + goto partial; + + for (i = 0; i < field->nargs; i++) + if (!trace_seq_printf(s, " 0x%lx", field->args[i])) + goto partial; + + if (!trace_seq_puts(s, "\n")) + goto partial; + + return TRACE_TYPE_HANDLED; +partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +enum print_line_t +print_kretprobe_event(struct trace_iterator *iter, int flags) +{ + struct kretprobe_trace_entry *field; + struct trace_seq *s = &iter->seq; + int i; + + trace_assign_type(field, iter->ent); + + if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) + goto partial; + + if (!trace_seq_puts(s, " <- ")) + goto partial; + + if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) + goto partial; + + if (!trace_seq_puts(s, ":")) + goto partial; + + for (i = 0; i < field->nargs; i++) + if (!trace_seq_printf(s, " 0x%lx", field->args[i])) + goto partial; + + if (!trace_seq_puts(s, "\n")) + goto partial; + + return TRACE_TYPE_HANDLED; +partial: + return TRACE_TYPE_PARTIAL_LINE; +} + +static struct trace_event kprobe_trace_event = { + .type = TRACE_KPROBE, + .trace = print_kprobe_event, +}; + +static struct trace_event kretprobe_trace_event = { + .type = TRACE_KRETPROBE, + .trace = print_kretprobe_event, +}; + +static int probe_event_enable(struct ftrace_event_call *call) +{ + struct trace_probe *tp = (struct trace_probe *)call->data; + + if (probe_is_return(tp)) + return enable_kretprobe(&tp->rp); + else + return enable_kprobe(&tp->kp); +} + +static void probe_event_disable(struct ftrace_event_call *call) +{ + struct trace_probe *tp = (struct trace_probe *)call->data; + + if (probe_is_return(tp)) + disable_kretprobe(&tp->rp); + else + disable_kprobe(&tp->kp); +} + +static int probe_event_raw_init(struct ftrace_event_call *event_call) +{ + INIT_LIST_HEAD(&event_call->fields); + init_preds(event_call); + return 0; +} + +#undef DEFINE_FIELD +#define DEFINE_FIELD(type, item, name, is_signed) \ + do { \ + ret = trace_define_field(event_call, #type, name, \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed, \ + FILTER_OTHER); \ + if (ret) \ + return ret; \ + } while (0) + +static int kprobe_event_define_fields(struct ftrace_event_call *event_call) +{ + int ret, i; + struct kprobe_trace_entry field; + char buf[MAX_ARGSTR_LEN + 1]; + struct trace_probe *tp = (struct trace_probe *)event_call->data; + + ret = trace_define_common_fields(event_call); + if (!ret) + return ret; + + DEFINE_FIELD(unsigned long, ip, "ip", 0); + DEFINE_FIELD(int, nargs, "nargs", 1); + for (i = 0; i < tp->nr_args; i++) { + /* Set argN as a field */ + sprintf(buf, "arg%d", i); + DEFINE_FIELD(unsigned long, args[i], buf, 0); + /* Set argument string as an alias field */ + ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); + if (ret < 0) + return ret; + DEFINE_FIELD(unsigned long, args[i], buf, 0); + } + return 0; +} + +static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) +{ + int ret, i; + struct kretprobe_trace_entry field; + char buf[MAX_ARGSTR_LEN + 1]; + struct trace_probe *tp = (struct trace_probe *)event_call->data; + + ret = trace_define_common_fields(event_call); + if (!ret) + return ret; + + DEFINE_FIELD(unsigned long, func, "func", 0); + DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0); + DEFINE_FIELD(int, nargs, "nargs", 1); + for (i = 0; i < tp->nr_args; i++) { + /* Set argN as a field */ + sprintf(buf, "arg%d", i); + DEFINE_FIELD(unsigned long, args[i], buf, 0); + /* Set argument string as an alias field */ + ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); + if (ret < 0) + return ret; + DEFINE_FIELD(unsigned long, args[i], buf, 0); + } + return 0; +} + +static int __probe_event_show_format(struct trace_seq *s, + struct trace_probe *tp, const char *fmt, + const char *arg) +{ + int i, ret; + char buf[MAX_ARGSTR_LEN + 1]; + + /* Show aliases */ + for (i = 0; i < tp->nr_args; i++) { + ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); + if (ret < 0) + return ret; + if (!trace_seq_printf(s, "\talias: %s;\toriginal: arg%d;\n", + buf, i)) + return 0; + } + /* Show format */ + if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt)) + return 0; + + for (i = 0; i < tp->nr_args; i++) + if (!trace_seq_puts(s, " 0x%lx")) + return 0; + + if (!trace_seq_printf(s, "\", %s", arg)) + return 0; + + for (i = 0; i < tp->nr_args; i++) + if (!trace_seq_printf(s, ", arg%d", i)) + return 0; + + return trace_seq_puts(s, "\n"); +} + +#undef SHOW_FIELD +#define SHOW_FIELD(type, item, name) \ + do { \ + ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \ + "offset:%u;tsize:%u;\n", name, \ + (unsigned int)offsetof(typeof(field), item),\ + (unsigned int)sizeof(type)); \ + if (!ret) \ + return 0; \ + } while (0) + +static int kprobe_event_show_format(struct ftrace_event_call *call, + struct trace_seq *s) +{ + struct kprobe_trace_entry field __attribute__((unused)); + int ret, i; + char buf[8]; + struct trace_probe *tp = (struct trace_probe *)call->data; + + SHOW_FIELD(unsigned long, ip, "ip"); + SHOW_FIELD(int, nargs, "nargs"); + + /* Show fields */ + for (i = 0; i < tp->nr_args; i++) { + sprintf(buf, "arg%d", i); + SHOW_FIELD(unsigned long, args[i], buf); + } + trace_seq_puts(s, "\n"); + + return __probe_event_show_format(s, tp, "%lx:", "ip"); +} + +static int kretprobe_event_show_format(struct ftrace_event_call *call, + struct trace_seq *s) +{ + struct kretprobe_trace_entry field __attribute__((unused)); + int ret, i; + char buf[8]; + struct trace_probe *tp = (struct trace_probe *)call->data; + + SHOW_FIELD(unsigned long, func, "func"); + SHOW_FIELD(unsigned long, ret_ip, "ret_ip"); + SHOW_FIELD(int, nargs, "nargs"); + + /* Show fields */ + for (i = 0; i < tp->nr_args; i++) { + sprintf(buf, "arg%d", i); + SHOW_FIELD(unsigned long, args[i], buf); + } + trace_seq_puts(s, "\n"); + + return __probe_event_show_format(s, tp, "%lx <- %lx:", + "func, ret_ip"); +} + +static int register_probe_event(struct trace_probe *tp) +{ + struct ftrace_event_call *call = &tp->call; + int ret; + + /* Initialize ftrace_event_call */ + call->system = "kprobes"; + if (probe_is_return(tp)) { + call->event = &kretprobe_trace_event; + call->id = TRACE_KRETPROBE; + call->raw_init = probe_event_raw_init; + call->show_format = kretprobe_event_show_format; + call->define_fields = kretprobe_event_define_fields; + } else { + call->event = &kprobe_trace_event; + call->id = TRACE_KPROBE; + call->raw_init = probe_event_raw_init; + call->show_format = kprobe_event_show_format; + call->define_fields = kprobe_event_define_fields; + } + call->enabled = 1; + call->regfunc = probe_event_enable; + call->unregfunc = probe_event_disable; + call->data = tp; + ret = trace_add_event_call(call); + if (ret) + pr_info("Failed to register kprobe event: %s\n", call->name); + return ret; +} + +static void unregister_probe_event(struct trace_probe *tp) +{ + /* + * Prevent to unregister event itself because the event is shared + * among other probes. + */ + tp->call.event = NULL; + trace_remove_event_call(&tp->call); +} + +/* Make a debugfs interface for controling probe points */ +static __init int init_kprobe_trace(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + int ret; + + ret = register_ftrace_event(&kprobe_trace_event); + if (!ret) { + pr_warning("Could not register kprobe_trace_event type.\n"); + return 0; + } + ret = register_ftrace_event(&kretprobe_trace_event); + if (!ret) { + pr_warning("Could not register kretprobe_trace_event type.\n"); + return 0; + } + + d_tracer = tracing_init_dentry(); + if (!d_tracer) + return 0; + + entry = debugfs_create_file("kprobe_events", 0644, d_tracer, + NULL, &kprobe_events_ops); + + if (!entry) + pr_warning("Could not create debugfs " + "'kprobe_events' entry\n"); + return 0; +} +fs_initcall(init_kprobe_trace); + + +#ifdef CONFIG_FTRACE_STARTUP_TEST + +static int kprobe_trace_selftest_target(int a1, int a2, int a3, + int a4, int a5, int a6) +{ + return a1 + a2 + a3 + a4 + a5 + a6; +} + +static __init int kprobe_trace_self_tests_init(void) +{ + int ret; + int (*target)(int, int, int, int, int, int); + + target = kprobe_trace_selftest_target; + + pr_info("Testing kprobe tracing: "); + + ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " + "a1 a2 a3 a4 a5 a6"); + if (WARN_ON_ONCE(ret)) + pr_warning("error enabling function entry\n"); + + ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " + "ra rv"); + if (WARN_ON_ONCE(ret)) + pr_warning("error enabling function return\n"); + + ret = target(1, 2, 3, 4, 5, 6); + + cleanup_all_probes(); + + pr_cont("OK\n"); + return 0; +} + +late_initcall(kprobe_trace_self_tests_init); + +#endif -- cgit v0.10.2 From d8ec91850efaf6cee9234c80260fe03881242374 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 19 Aug 2009 21:13:57 +0200 Subject: tracing: Add kprobe-based event tracer documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the documentation to use the kprobe based event tracer. [fweisbec@gmail.com: Split tracer and its Documentation in two patchs] Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203510.31965.29123.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt new file mode 100644 index 0000000..efff6eb --- /dev/null +++ b/Documentation/trace/kprobetrace.txt @@ -0,0 +1,139 @@ + Kprobe-based Event Tracer + ========================= + + Documentation is written by Masami Hiramatsu + + +Overview +-------- +This tracer is similar to the events tracer which is based on Tracepoint +infrastructure. Instead of Tracepoint, this tracer is based on kprobes(kprobe +and kretprobe). It probes anywhere where kprobes can probe(this means, all +functions body except for __kprobes functions). + +Unlike the function tracer, this tracer can probe instructions inside of +kernel functions. It allows you to check which instruction has been executed. + +Unlike the Tracepoint based events tracer, this tracer can add and remove +probe points on the fly. + +Similar to the events tracer, this tracer doesn't need to be activated via +current_tracer, instead of that, just set probe points via +/sys/kernel/debug/tracing/kprobe_events. And you can set filters on each +probe events via /sys/kernel/debug/tracing/events/kprobes//filter. + + +Synopsis of kprobe_events +------------------------- + p[:EVENT] SYMBOL[+offs|-offs]|MEMADDR [FETCHARGS] : Set a probe + r[:EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe + + EVENT : Event name. + SYMBOL[+offs|-offs] : Symbol+offset where the probe is inserted. + MEMADDR : Address where the probe is inserted. + + FETCHARGS : Arguments. + %REG : Fetch register REG + sN : Fetch Nth entry of stack (N >= 0) + sa : Fetch stack address. + @ADDR : Fetch memory at ADDR (ADDR should be in kernel) + @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) + aN : Fetch function argument. (N >= 0)(*) + rv : Fetch return value.(**) + ra : Fetch return address.(**) + +|-offs(FETCHARG) : fetch memory at FETCHARG +|- offs address.(***) + + (*) aN may not correct on asmlinkaged functions and at the middle of + function body. + (**) only for return probe. + (***) this is useful for fetching a field of data structures. + + +Per-Probe Event Filtering +------------------------- + Per-probe event filtering feature allows you to set different filter on each +probe and gives you what arguments will be shown in trace buffer. If an event +name is specified right after 'p:' or 'r:' in kprobe_events, the tracer adds +an event under tracing/events/kprobes/, at the directory you can see +'id', 'enabled', 'format' and 'filter'. + +enabled: + You can enable/disable the probe by writing 1 or 0 on it. + +format: + It shows the format of this probe event. It also shows aliases of arguments + which you specified to kprobe_events. + +filter: + You can write filtering rules of this event. And you can use both of aliase + names and field names for describing filters. + + +Usage examples +-------------- +To add a probe as a new event, write a new definition to kprobe_events +as below. + + echo p:myprobe do_sys_open a0 a1 a2 a3 > /sys/kernel/debug/tracing/kprobe_events + + This sets a kprobe on the top of do_sys_open() function with recording +1st to 4th arguments as "myprobe" event. + + echo r:myretprobe do_sys_open rv ra >> /sys/kernel/debug/tracing/kprobe_events + + This sets a kretprobe on the return point of do_sys_open() function with +recording return value and return address as "myretprobe" event. + You can see the format of these events via +/sys/kernel/debug/tracing/events/kprobes//format. + + cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format +name: myprobe +ID: 23 +format: + field:unsigned short common_type; offset:0; size:2; + field:unsigned char common_flags; offset:2; size:1; + field:unsigned char common_preempt_count; offset:3; size:1; + field:int common_pid; offset:4; size:4; + field:int common_tgid; offset:8; size:4; + + field: unsigned long ip; offset:16;tsize:8; + field: int nargs; offset:24;tsize:4; + field: unsigned long arg0; offset:32;tsize:8; + field: unsigned long arg1; offset:40;tsize:8; + field: unsigned long arg2; offset:48;tsize:8; + field: unsigned long arg3; offset:56;tsize:8; + + alias: a0; original: arg0; + alias: a1; original: arg1; + alias: a2; original: arg2; + alias: a3; original: arg3; + +print fmt: "%lx: 0x%lx 0x%lx 0x%lx 0x%lx", ip, arg0, arg1, arg2, arg3 + + + You can see that the event has 4 arguments and alias expressions +corresponding to it. + + echo > /sys/kernel/debug/tracing/kprobe_events + + This clears all probe points. and you can see the traced information via +/sys/kernel/debug/tracing/trace. + + cat /sys/kernel/debug/tracing/trace +# tracer: nop +# +# TASK-PID CPU# TIMESTAMP FUNCTION +# | | | | | + <...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: 0x3 0x7fffd1ec4440 0x8000 0x0 + <...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: 0xfffffffffffffffe 0xffffffff81367a3a + <...>-1447 [001] 1038282.286885: do_sys_open+0x0/0xd6: 0xffffff9c 0x40413c 0x8000 0x1b6 + <...>-1447 [001] 1038282.286915: sys_open+0x1b/0x1d <- do_sys_open: 0x3 0xffffffff81367a3a + <...>-1447 [001] 1038282.286969: do_sys_open+0x0/0xd6: 0xffffff9c 0x4041c6 0x98800 0x10 + <...>-1447 [001] 1038282.286976: sys_open+0x1b/0x1d <- do_sys_open: 0x3 0xffffffff81367a3a + + + Each line shows when the kernel hits a probe, and <- SYMBOL means kernel +returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel +returns from do_sys_open to sys_open+0x1b). + + -- cgit v0.10.2 From a82378d8802717b9776a7d9b54422f65c414d6cc Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:35:18 -0400 Subject: tracing: Kprobe-tracer supports more than 6 arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Support up to 128 arguments to fetch for each kprobes event. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203518.31965.96979.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index efff6eb..c9c09b4 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -32,7 +32,7 @@ Synopsis of kprobe_events SYMBOL[+offs|-offs] : Symbol+offset where the probe is inserted. MEMADDR : Address where the probe is inserted. - FETCHARGS : Arguments. + FETCHARGS : Arguments. Each probe can have up to 128 args. %REG : Fetch register REG sN : Fetch Nth entry of stack (N >= 0) sa : Fetch stack address. diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 0c4f00a..6d488ef 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -32,7 +32,7 @@ #include "trace.h" #include "trace_output.h" -#define TRACE_KPROBE_ARGS 6 +#define MAX_TRACE_ARGS 128 #define MAX_ARGSTR_LEN 63 /* currently, trace_kprobe only supports X86. */ @@ -184,11 +184,15 @@ struct trace_probe { struct kretprobe rp; }; const char *symbol; /* symbol name */ - unsigned int nr_args; - struct fetch_func args[TRACE_KPROBE_ARGS]; struct ftrace_event_call call; + unsigned int nr_args; + struct fetch_func args[]; }; +#define SIZEOF_TRACE_PROBE(n) \ + (offsetof(struct trace_probe, args) + \ + (sizeof(struct fetch_func) * (n))) + static int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs); static int kretprobe_trace_func(struct kretprobe_instance *ri, struct pt_regs *regs); @@ -263,11 +267,11 @@ static DEFINE_MUTEX(probe_lock); static LIST_HEAD(probe_list); static struct trace_probe *alloc_trace_probe(const char *symbol, - const char *event) + const char *event, int nargs) { struct trace_probe *tp; - tp = kzalloc(sizeof(struct trace_probe), GFP_KERNEL); + tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL); if (!tp) return ERR_PTR(-ENOMEM); @@ -573,9 +577,10 @@ static int create_trace_probe(int argc, char **argv) if (offset && is_return) return -EINVAL; } + argc -= 2; argv += 2; /* setup a probe */ - tp = alloc_trace_probe(symbol, event); + tp = alloc_trace_probe(symbol, event, argc); if (IS_ERR(tp)) return PTR_ERR(tp); @@ -594,8 +599,8 @@ static int create_trace_probe(int argc, char **argv) kp->addr = addr; /* parse arguments */ - argc -= 2; argv += 2; ret = 0; - for (i = 0; i < argc && i < TRACE_KPROBE_ARGS; i++) { + ret = 0; + for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { if (strlen(argv[i]) > MAX_ARGSTR_LEN) { pr_info("Argument%d(%s) is too long.\n", i, argv[i]); ret = -ENOSPC; -- cgit v0.10.2 From 4263565d491145b57621a761714f2ca6f1293a45 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:35:26 -0400 Subject: tracing: Generate names for each kprobe event automatically MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generate names for each kprobe event based on the probe point. (SYMBOL+offs or MEMADDR). Also remove generic k*probe event types because there is no user of those types. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203526.31965.56672.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index c9c09b4..5e59e85 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -28,7 +28,8 @@ Synopsis of kprobe_events p[:EVENT] SYMBOL[+offs|-offs]|MEMADDR [FETCHARGS] : Set a probe r[:EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe - EVENT : Event name. + EVENT : Event name. If omitted, the event name is generated + based on SYMBOL+offs or MEMADDR. SYMBOL[+offs|-offs] : Symbol+offset where the probe is inserted. MEMADDR : Address where the probe is inserted. diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h index 186b598..e74f090 100644 --- a/kernel/trace/trace_event_types.h +++ b/kernel/trace/trace_event_types.h @@ -175,22 +175,4 @@ TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore, TP_RAW_FMT("type:%u call_site:%lx ptr:%p") ); -TRACE_EVENT_FORMAT(kprobe, TRACE_KPROBE, kprobe_trace_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, ip, ip) - TRACE_FIELD(int, nargs, nargs) - TRACE_FIELD_ZERO(unsigned long, args) - ), - TP_RAW_FMT("%08lx: args:0x%lx ...") -); - -TRACE_EVENT_FORMAT(kretprobe, TRACE_KRETPROBE, kretprobe_trace_entry, ignore, - TRACE_STRUCT( - TRACE_FIELD(unsigned long, func, func) - TRACE_FIELD(unsigned long, ret_ip, ret_ip) - TRACE_FIELD(int, nargs, nargs) - TRACE_FIELD_ZERO(unsigned long, args) - ), - TP_RAW_FMT("%08lx <- %08lx: args:0x%lx ...") -); #undef TRACE_SYSTEM diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6d488ef..8aeb24c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -34,6 +34,7 @@ #define MAX_TRACE_ARGS 128 #define MAX_ARGSTR_LEN 63 +#define MAX_EVENT_NAME_LEN 64 /* currently, trace_kprobe only supports X86. */ @@ -280,11 +281,11 @@ static struct trace_probe *alloc_trace_probe(const char *symbol, if (!tp->symbol) goto error; } - if (event) { - tp->call.name = kstrdup(event, GFP_KERNEL); - if (!tp->call.name) - goto error; - } + if (!event) + goto error; + tp->call.name = kstrdup(event, GFP_KERNEL); + if (!tp->call.name) + goto error; INIT_LIST_HEAD(&tp->list); return tp; @@ -314,7 +315,7 @@ static struct trace_probe *find_probe_event(const char *event) struct trace_probe *tp; list_for_each_entry(tp, &probe_list, list) - if (tp->call.name && !strcmp(tp->call.name, event)) + if (!strcmp(tp->call.name, event)) return tp; return NULL; } @@ -330,8 +331,7 @@ static void __unregister_trace_probe(struct trace_probe *tp) /* Unregister a trace_probe and probe_event: call with locking probe_lock */ static void unregister_trace_probe(struct trace_probe *tp) { - if (tp->call.name) - unregister_probe_event(tp); + unregister_probe_event(tp); __unregister_trace_probe(tp); list_del(&tp->list); } @@ -360,18 +360,16 @@ static int register_trace_probe(struct trace_probe *tp) goto end; } /* register as an event */ - if (tp->call.name) { - old_tp = find_probe_event(tp->call.name); - if (old_tp) { - /* delete old event */ - unregister_trace_probe(old_tp); - free_trace_probe(old_tp); - } - ret = register_probe_event(tp); - if (ret) { - pr_warning("Faild to register probe event(%d)\n", ret); - __unregister_trace_probe(tp); - } + old_tp = find_probe_event(tp->call.name); + if (old_tp) { + /* delete old event */ + unregister_trace_probe(old_tp); + free_trace_probe(old_tp); + } + ret = register_probe_event(tp); + if (ret) { + pr_warning("Faild to register probe event(%d)\n", ret); + __unregister_trace_probe(tp); } list_add_tail(&tp->list, &probe_list); end: @@ -580,7 +578,18 @@ static int create_trace_probe(int argc, char **argv) argc -= 2; argv += 2; /* setup a probe */ - tp = alloc_trace_probe(symbol, event, argc); + if (!event) { + /* Make a new event name */ + char buf[MAX_EVENT_NAME_LEN]; + if (symbol) + snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld", + is_return ? 'r' : 'p', symbol, offset); + else + snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p", + is_return ? 'r' : 'p', addr); + tp = alloc_trace_probe(symbol, buf, argc); + } else + tp = alloc_trace_probe(symbol, event, argc); if (IS_ERR(tp)) return PTR_ERR(tp); @@ -661,8 +670,7 @@ static int probes_seq_show(struct seq_file *m, void *v) char buf[MAX_ARGSTR_LEN + 1]; seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); - if (tp->call.name) - seq_printf(m, ":%s", tp->call.name); + seq_printf(m, ":%s", tp->call.name); if (tp->symbol) seq_printf(m, " %s%+ld", probe_symbol(tp), probe_offset(tp)); @@ -780,10 +788,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) struct ring_buffer_event *event; int size, i, pc; unsigned long irq_flags; - struct ftrace_event_call *call = &event_kprobe; - - if (&tp->call.name) - call = &tp->call; + struct ftrace_event_call *call = &tp->call; local_save_flags(irq_flags); pc = preempt_count(); @@ -815,10 +820,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, struct ring_buffer_event *event; int size, i, pc; unsigned long irq_flags; - struct ftrace_event_call *call = &event_kretprobe; - - if (&tp->call.name) - call = &tp->call; + struct ftrace_event_call *call = &tp->call; local_save_flags(irq_flags); pc = preempt_count(); -- cgit v0.10.2 From ff50d99136c3315513ef3b2921e77f35ab04d081 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:35:34 -0400 Subject: tracing: Kprobe tracer assigns new event ids for each event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Assign new event ids for each kprobes event. This doesn't clear ring_buffer when unregistering each kprobe event. Thus, if you mind 'Unknown event' messages, clear the buffer manually after changing kprobe events. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203534.31965.49105.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 667f832..f5362a0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -38,8 +38,6 @@ enum trace_type { TRACE_KMEM_FREE, TRACE_POWER, TRACE_BLK, - TRACE_KPROBE, - TRACE_KRETPROBE, __TRACE_LAST_TYPE, }; @@ -343,10 +341,6 @@ extern void __ftrace_bad_type(void); TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ TRACE_KMEM_FREE); \ - IF_ASSIGN(var, ent, struct kprobe_trace_entry, \ - TRACE_KPROBE); \ - IF_ASSIGN(var, ent, struct kretprobe_trace_entry, \ - TRACE_KRETPROBE); \ __ftrace_bad_type(); \ } while (0) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8aeb24c..9c067bf 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -186,6 +186,7 @@ struct trace_probe { }; const char *symbol; /* symbol name */ struct ftrace_event_call call; + struct trace_event event; unsigned int nr_args; struct fetch_func args[]; }; @@ -795,7 +796,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); - event = trace_current_buffer_lock_reserve(TRACE_KPROBE, size, + event = trace_current_buffer_lock_reserve(call->id, size, irq_flags, pc); if (!event) return 0; @@ -827,7 +828,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); - event = trace_current_buffer_lock_reserve(TRACE_KRETPROBE, size, + event = trace_current_buffer_lock_reserve(call->id, size, irq_flags, pc); if (!event) return 0; @@ -853,7 +854,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags) struct trace_seq *s = &iter->seq; int i; - trace_assign_type(field, iter->ent); + field = (struct kprobe_trace_entry *)iter->ent; if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) goto partial; @@ -880,7 +881,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags) struct trace_seq *s = &iter->seq; int i; - trace_assign_type(field, iter->ent); + field = (struct kretprobe_trace_entry *)iter->ent; if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) goto partial; @@ -906,16 +907,6 @@ partial: return TRACE_TYPE_PARTIAL_LINE; } -static struct trace_event kprobe_trace_event = { - .type = TRACE_KPROBE, - .trace = print_kprobe_event, -}; - -static struct trace_event kretprobe_trace_event = { - .type = TRACE_KRETPROBE, - .trace = print_kretprobe_event, -}; - static int probe_event_enable(struct ftrace_event_call *call) { struct trace_probe *tp = (struct trace_probe *)call->data; @@ -1104,35 +1095,35 @@ static int register_probe_event(struct trace_probe *tp) /* Initialize ftrace_event_call */ call->system = "kprobes"; if (probe_is_return(tp)) { - call->event = &kretprobe_trace_event; - call->id = TRACE_KRETPROBE; + tp->event.trace = print_kretprobe_event; call->raw_init = probe_event_raw_init; call->show_format = kretprobe_event_show_format; call->define_fields = kretprobe_event_define_fields; } else { - call->event = &kprobe_trace_event; - call->id = TRACE_KPROBE; + tp->event.trace = print_kprobe_event; call->raw_init = probe_event_raw_init; call->show_format = kprobe_event_show_format; call->define_fields = kprobe_event_define_fields; } + call->event = &tp->event; + call->id = register_ftrace_event(&tp->event); + if (!call->id) + return -ENODEV; call->enabled = 1; call->regfunc = probe_event_enable; call->unregfunc = probe_event_disable; call->data = tp; ret = trace_add_event_call(call); - if (ret) + if (ret) { pr_info("Failed to register kprobe event: %s\n", call->name); + unregister_ftrace_event(&tp->event); + } return ret; } static void unregister_probe_event(struct trace_probe *tp) { - /* - * Prevent to unregister event itself because the event is shared - * among other probes. - */ - tp->call.event = NULL; + /* tp->event is unregistered in trace_remove_event_call() */ trace_remove_event_call(&tp->call); } @@ -1141,18 +1132,6 @@ static __init int init_kprobe_trace(void) { struct dentry *d_tracer; struct dentry *entry; - int ret; - - ret = register_ftrace_event(&kprobe_trace_event); - if (!ret) { - pr_warning("Could not register kprobe_trace_event type.\n"); - return 0; - } - ret = register_ftrace_event(&kretprobe_trace_event); - if (!ret) { - pr_warning("Could not register kretprobe_trace_event type.\n"); - return 0; - } d_tracer = tracing_init_dentry(); if (!d_tracer) -- cgit v0.10.2 From cd7e7bd5e44718c7625ce1e1f0fda53d77cd3797 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 13 Aug 2009 16:35:42 -0400 Subject: tracing: Add kprobes event profiling interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add profiling interfaces for each kprobes event. This interface provides how many times each probe hit or missed. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: Jim Keniston Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090813203541.31965.8452.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 5e59e85..3de7517 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -70,6 +70,14 @@ filter: names and field names for describing filters. +Event Profiling +--------------- + You can check the total number of probe hits and probe miss-hits via +/sys/kernel/debug/tracing/kprobe_profile. + The first column is event name, the second is the number of probe hits, +the third is the number of probe miss-hits. + + Usage examples -------------- To add a probe as a new event, write a new definition to kprobe_events diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9c067bf..ce68197 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -184,6 +184,7 @@ struct trace_probe { struct kprobe kp; struct kretprobe rp; }; + unsigned long nhit; const char *symbol; /* symbol name */ struct ftrace_event_call call; struct trace_event event; @@ -781,6 +782,37 @@ static const struct file_operations kprobe_events_ops = { .write = probes_write, }; +/* Probes profiling interfaces */ +static int probes_profile_seq_show(struct seq_file *m, void *v) +{ + struct trace_probe *tp = v; + + seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, + probe_is_return(tp) ? tp->rp.kp.nmissed : tp->kp.nmissed); + + return 0; +} + +static const struct seq_operations profile_seq_op = { + .start = probes_seq_start, + .next = probes_seq_next, + .stop = probes_seq_stop, + .show = probes_profile_seq_show +}; + +static int profile_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &profile_seq_op); +} + +static const struct file_operations kprobe_profile_ops = { + .owner = THIS_MODULE, + .open = profile_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + /* Kprobe handler */ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) { @@ -791,6 +823,8 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) unsigned long irq_flags; struct ftrace_event_call *call = &tp->call; + tp->nhit++; + local_save_flags(irq_flags); pc = preempt_count(); @@ -1140,9 +1174,18 @@ static __init int init_kprobe_trace(void) entry = debugfs_create_file("kprobe_events", 0644, d_tracer, NULL, &kprobe_events_ops); + /* Event list interface */ if (!entry) pr_warning("Could not create debugfs " "'kprobe_events' entry\n"); + + /* Profile interface */ + entry = debugfs_create_file("kprobe_profile", 0444, d_tracer, + NULL, &kprobe_profile_ops); + + if (!entry) + pr_warning("Could not create debugfs " + "'kprobe_profile' entry\n"); return 0; } fs_initcall(init_kprobe_trace); -- cgit v0.10.2 From 8d7d14fb27818eb08ebedf9f4a6e286970fe9977 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Aug 2009 15:43:07 -0400 Subject: x86: Fix x86 instruction decoder selftest to check only .text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix x86 instruction decoder selftest to check only .text because other sections (e.g. .notes) will have random bytes which don't need to be checked. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: H. Peter Anvin Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090821194307.12478.76938.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 3dd626b..95e9cc4 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -1,6 +1,6 @@ PHONY += posttest quiet_cmd_posttest = TEST $@ - cmd_posttest = $(OBJDUMP) -d $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len + cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len posttest: $(obj)/test_get_len vmlinux $(call cmd,posttest) -- cgit v0.10.2 From 69d991f32152283cbc373136fa45bbb152b32048 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Aug 2009 15:43:16 -0400 Subject: x86: Check awk features before generating inat-tables.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check some awk mandatory features to generate inat-tables.c that old mawk doesn't support. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: H. Peter Anvin Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090821194316.12478.57394.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 93b62c9..19ba096 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -4,7 +4,25 @@ # # Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c +# Awk implementation sanity check +function check_awk_implement() { + if (!match("abc", "[[:lower:]]+")) + return "Your awk doesn't support charactor-class." + if (sprintf("%x", 0) != "0") + return "Your awk has a printf-format problem." + return "" +} + BEGIN { + # Implementation error checking + awkchecked = check_awk_implement() + if (awkchecked != "") { + print "Error: " awkchecked > "/dev/stderr" + print "Please try to use gawk." > "/dev/stderr" + exit 1 + } + + # Setup generating tables print "/* x86 opcode map generated from x86-opcode-map.txt */" print "/* Do not change this code. */" ggid = 1 @@ -293,6 +311,8 @@ function convert_operands(opnd, i,imm,mod) } END { + if (awkchecked != "") + exit 1 # print escape opcode map's array print "/* Escape opcode map array */" print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ -- cgit v0.10.2 From 38a47497d9e34632abbeb484603cedf10c4b05e4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Aug 2009 15:43:43 -0400 Subject: tracing/kprobes: Fix format typo in trace_kprobes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a format typo in kprobe-tracer. Currently, it shows 'tsize' in format; $ cat /debug/tracing/events/kprobes/event/format ... field: unsigned long ip; offset:16;tsize:8; field: int nargs; offset:24;tsize:4; ... This should be '\tsize'; $ cat /debug/tracing/events/kprobes/event/format ... field: unsigned long ip; offset:16; size:8; field: int nargs; offset:24; size:4; ... Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: H. Peter Anvin Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090821194343.12478.37618.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ce68197..1a9ca79 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1070,7 +1070,7 @@ static int __probe_event_show_format(struct trace_seq *s, #define SHOW_FIELD(type, item, name) \ do { \ ret = trace_seq_printf(s, "\tfield: " #type " %s;\t" \ - "offset:%u;tsize:%u;\n", name, \ + "offset:%u;\tsize:%u;\n", name, \ (unsigned int)offsetof(typeof(field), item),\ (unsigned int)sizeof(type)); \ if (!ret) \ -- cgit v0.10.2 From 30a7e073b590ebd1829a906164b0a637e77cc967 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 21 Aug 2009 15:43:51 -0400 Subject: tracing/kprobes: Change trace_arg to probe_arg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change trace_arg_string() and parse_trace_arg() to probe_arg_string() and parse_probe_arg(), since those are kprobe-tracer local functions. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: H. Peter Anvin Cc: Ananth N Mavinakayanahalli Cc: Avi Kivity Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: PrzemysÅ‚aw PaweÅ‚czyk Cc: Roland McGrath Cc: Sam Ravnborg Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi Cc: Vegard Nossum LKML-Reference: <20090821194351.12478.15247.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1a9ca79..f4ec3fc 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -220,7 +220,7 @@ static __kprobes void *probe_address(struct trace_probe *tp) return (probe_is_return(tp)) ? tp->rp.kp.addr : tp->kp.addr; } -static int trace_arg_string(char *buf, size_t n, struct fetch_func *ff) +static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) { int ret = -EINVAL; @@ -250,7 +250,7 @@ static int trace_arg_string(char *buf, size_t n, struct fetch_func *ff) if (ret >= n) goto end; l += ret; - ret = trace_arg_string(buf + l, n - l, &id->orig); + ret = probe_arg_string(buf + l, n - l, &id->orig); if (ret < 0) goto end; l += ret; @@ -408,7 +408,7 @@ static int split_symbol_offset(char *symbol, long *offset) #define PARAM_MAX_ARGS 16 #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) -static int parse_trace_arg(char *arg, struct fetch_func *ff, int is_return) +static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) { int ret = 0; unsigned long param; @@ -499,7 +499,7 @@ static int parse_trace_arg(char *arg, struct fetch_func *ff, int is_return) if (!id) return -ENOMEM; id->offset = offset; - ret = parse_trace_arg(arg, &id->orig, is_return); + ret = parse_probe_arg(arg, &id->orig, is_return); if (ret) kfree(id); else { @@ -617,7 +617,7 @@ static int create_trace_probe(int argc, char **argv) ret = -ENOSPC; goto error; } - ret = parse_trace_arg(argv[i], &tp->args[i], is_return); + ret = parse_probe_arg(argv[i], &tp->args[i], is_return); if (ret) goto error; } @@ -680,7 +680,7 @@ static int probes_seq_show(struct seq_file *m, void *v) seq_printf(m, " 0x%p", probe_address(tp)); for (i = 0; i < tp->nr_args; i++) { - ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); + ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); if (ret < 0) { pr_warning("Argument%d decoding error(%d).\n", i, ret); return ret; @@ -997,7 +997,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) sprintf(buf, "arg%d", i); DEFINE_FIELD(unsigned long, args[i], buf, 0); /* Set argument string as an alias field */ - ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); + ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); if (ret < 0) return ret; DEFINE_FIELD(unsigned long, args[i], buf, 0); @@ -1024,7 +1024,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) sprintf(buf, "arg%d", i); DEFINE_FIELD(unsigned long, args[i], buf, 0); /* Set argument string as an alias field */ - ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); + ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); if (ret < 0) return ret; DEFINE_FIELD(unsigned long, args[i], buf, 0); @@ -1041,7 +1041,7 @@ static int __probe_event_show_format(struct trace_seq *s, /* Show aliases */ for (i = 0; i < tp->nr_args; i++) { - ret = trace_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); + ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); if (ret < 0) return ret; if (!trace_seq_printf(s, "\talias: %s;\toriginal: arg%d;\n", -- cgit v0.10.2 From 24851d2447830e6cba4c4b641cb73e713f312373 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 26 Aug 2009 23:38:30 +0200 Subject: tracing/kprobes: Dump the culprit kprobe in case of kprobe recursion Kprobes can enter into a probing recursion, ie: a kprobe that does an endless loop because one of its core mechanism function used during probing is also probed itself. This patch helps pinpointing the kprobe that raised such recursion by dumping it and raising a BUG instead of a warning (we also disarm the kprobe to try avoiding recursion in BUG itself). Having a BUG instead of a warning stops the stacktrace in the right place and doesn't pollute the logs with hundreds of traces that eventually end up in a stack overflow. Signed-off-by: Frederic Weisbecker Cc: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 16ae961..ecee3d2 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -490,9 +490,13 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, /* A probe has been hit in the codepath leading up * to, or just after, single-stepping of a probed * instruction. This entire codepath should strictly - * reside in .kprobes.text section. Raise a warning - * to highlight this peculiar case. + * reside in .kprobes.text section. + * Raise a BUG or we'll continue in an endless + * reentering loop and eventually a stack overflow. */ + arch_disarm_kprobe(p); + dump_kprobe(p); + BUG(); } default: /* impossible cases */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index bcd9c07..87eb79c 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -296,6 +296,8 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); int disable_kprobe(struct kprobe *kp); int enable_kprobe(struct kprobe *kp); +void dump_kprobe(struct kprobe *kp); + #else /* !CONFIG_KPROBES: */ static inline int kprobes_built_in(void) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ef177d6..f72e96c 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1141,6 +1141,13 @@ static void __kprobes kill_kprobe(struct kprobe *p) arch_remove_kprobe(p); } +void __kprobes dump_kprobe(struct kprobe *kp) +{ + printk(KERN_WARNING "Dumping kprobe:\n"); + printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n", + kp->symbol_name, kp->addr, kp->offset); +} + /* Module notifier call back, checking kprobes on the module */ static int __kprobes kprobes_module_callback(struct notifier_block *nb, unsigned long val, void *data) -- cgit v0.10.2 From aeaeae1187d7520f1c5559623f0a149da6a1c96e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 27 Aug 2009 05:09:51 +0200 Subject: tracing: Restore the const qualifier for field names and types definition Restore the const qualifier in field's name and type parameters of trace_define_field that was lost while solving a conflict. Fields names and types are defined as builtin constant strings in static TRACE_EVENTs. But kprobes allocates these dynamically. That said, we still want to always pass these strings as const char * in trace_define_fields() to avoid any further accidental writes on the pointed strings. Reported-by: Li Zefan Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 1ab3089..73edf5a 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -148,9 +148,9 @@ enum { }; extern int trace_define_common_fields(struct ftrace_event_call *call); -extern int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size, int is_signed, - int filter_type); +extern int trace_define_field(struct ftrace_event_call *call, const char *type, + const char *name, int offset, int size, + int is_signed, int filter_type); extern int trace_add_event_call(struct ftrace_event_call *call); extern void trace_remove_event_call(struct ftrace_event_call *call); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 8079bb5..197cdaa 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,8 +27,8 @@ DEFINE_MUTEX(event_mutex); LIST_HEAD(ftrace_events); -int trace_define_field(struct ftrace_event_call *call, char *type, - char *name, int offset, int size, int is_signed, +int trace_define_field(struct ftrace_event_call *call, const char *type, + const char *name, int offset, int size, int is_signed, int filter_type) { struct ftrace_event_field *field; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 5931933..a928dd0 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -193,8 +193,8 @@ int syscall_enter_define_fields(struct ftrace_event_call *call) return ret; for (i = 0; i < meta->nb_args; i++) { - ret = trace_define_field(call, (char *)meta->types[i], - (char *)meta->args[i], offset, + ret = trace_define_field(call, meta->types[i], + meta->args[i], offset, sizeof(unsigned long), 0, FILTER_OTHER); offset += sizeof(unsigned long); -- cgit v0.10.2 From f8468f3695209735c1595342f6bd95f7bdab66e1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 27 Aug 2009 05:23:29 +0200 Subject: tracing: Remove unneeded pointer casts Cleaup uneeded casts from void * to char * in syscalls tracing file. Reported-by: Li Zefan Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a928dd0..e7c676e 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -324,7 +324,7 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) int num; char *name; - name = (char *)call->data; + name = call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= FTRACE_SYSCALL_MAX) return -ENOSYS; @@ -347,7 +347,7 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) int num; char *name; - name = (char *)call->data; + name = call->data; num = syscall_name_to_nr(name); if (num < 0 || num >= FTRACE_SYSCALL_MAX) return; -- cgit v0.10.2 From e9afe9e1b3fdbd56cca53959a2519e70db9c8095 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 27 Aug 2009 13:22:58 -0400 Subject: kprobes/x86: Call BUG() when reentering probe into KPROBES_HIT_SS Call BUG() when a probe have been hit on the way of kprobe processing path, because that kind of probes are currently unrecoverable (recovering it will cause an infinite loop and stack overflow). The original code seems to assume that it's caused by an int3 which another subsystem inserted on out-of-line singlestep buffer if the hitting probe is same as current probe. However, in that case, int3-hitting-address is on the out-of-line buffer and should be different from first (current) int3 address. Thus, I decided to remove the code. I also removes arch_disarm_kprobe() because it will involve other stuffs in text_poke(). Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Ingo Molnar LKML-Reference: <20090827172258.8246.61889.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index ecee3d2..e0fb615 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -482,22 +482,16 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, kcb->kprobe_status = KPROBE_REENTER; break; case KPROBE_HIT_SS: - if (p == kprobe_running()) { - regs->flags &= ~X86_EFLAGS_TF; - regs->flags |= kcb->kprobe_saved_flags; - return 0; - } else { - /* A probe has been hit in the codepath leading up - * to, or just after, single-stepping of a probed - * instruction. This entire codepath should strictly - * reside in .kprobes.text section. - * Raise a BUG or we'll continue in an endless - * reentering loop and eventually a stack overflow. - */ - arch_disarm_kprobe(p); - dump_kprobe(p); - BUG(); - } + /* A probe has been hit in the codepath leading up to, or just + * after, single-stepping of a probed instruction. This entire + * codepath should strictly reside in .kprobes.text section. + * Raise a BUG or we'll continue in an endless reentering loop + * and eventually a stack overflow. + */ + printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", + p->addr); + dump_kprobe(p); + BUG(); default: /* impossible cases */ WARN_ON(1); -- cgit v0.10.2 From f5ad31158d60946b9fd18c8a79c283a6bc432430 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 27 Aug 2009 13:23:04 -0400 Subject: kprobes/x86-64: Allow to reenter probe on post_handler Allow to reenter probe on the post_handler of another probe on x86-64, because x86-64 already allows reentering int3. In that case, reentered probe just increases kp.nmissed and returns. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Ingo Molnar LKML-Reference: <20090827172304.8246.4822.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e0fb615..c5f1f11 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -463,17 +463,6 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, { switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: -#ifdef CONFIG_X86_64 - /* TODO: Provide re-entrancy from post_kprobes_handler() and - * avoid exception stack corruption while single-stepping on - * the instruction of the new probe. - */ - arch_disarm_kprobe(p); - regs->ip = (unsigned long)p->addr; - reset_current_kprobe(); - preempt_enable_no_resched(); - break; -#endif case KPROBE_HIT_ACTIVE: save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); -- cgit v0.10.2 From 62c9295f9dd250ea1bb2c8078642a275a9ce82f8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 27 Aug 2009 13:23:11 -0400 Subject: kprobes/x86: Fix to add __kprobes to in-kernel fault handing functions Add __kprobes to the functions which handle in-kernel fixable page faults. Since kprobes can cause those in-kernel page faults by accessing kprobe data structures, probing those fault functions will cause fault-int3-loop (do_page_fault has already been marked as __kprobes). Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Ingo Molnar LKML-Reference: <20090827172311.8246.92725.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index bfae139..c322e59 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -38,7 +38,8 @@ enum x86_pf_error_code { * Returns 0 if mmiotrace is disabled, or if the fault is not * handled by mmiotrace: */ -static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) +static inline int __kprobes +kmmio_fault(struct pt_regs *regs, unsigned long addr) { if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) @@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) return 0; } -static inline int notify_page_fault(struct pt_regs *regs) +static inline int __kprobes notify_page_fault(struct pt_regs *regs) { int ret = 0; @@ -239,7 +240,7 @@ void vmalloc_sync_all(void) * * Handle a fault on the vmalloc or module mapping area */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { unsigned long pgd_paddr; pmd_t *pmd_k; @@ -361,7 +362,7 @@ void vmalloc_sync_all(void) * * This assumes no large pages in there. */ -static noinline int vmalloc_fault(unsigned long address) +static noinline __kprobes int vmalloc_fault(unsigned long address) { pgd_t *pgd, *pgd_ref; pud_t *pud, *pud_ref; @@ -858,7 +859,7 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static noinline int +static noinline __kprobes int spurious_fault(unsigned long error_code, unsigned long address) { pgd_t *pgd; -- cgit v0.10.2 From 8f270083587a4cb70fa14f0e2fd698eb08a4dd07 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 27 Aug 2009 13:23:18 -0400 Subject: kprobes: Fix to add __kprobes to notify_die Add __kprobes to notify_die() because do_int3() calls notify_die() instead of atomic_notify_call_chain() which is already marked as __kprobes. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Ingo Molnar LKML-Reference: <20090827172318.8246.53702.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/kernel/notifier.c b/kernel/notifier.c index 61d5aa5..acd24e7 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -558,7 +558,7 @@ EXPORT_SYMBOL(unregister_reboot_notifier); static ATOMIC_NOTIFIER_HEAD(die_chain); -int notrace notify_die(enum die_val val, const char *str, +int notrace __kprobes notify_die(enum die_val val, const char *str, struct pt_regs *regs, long err, int trap, int sig) { struct die_args args = { -- cgit v0.10.2 From 8222d718b3ad3ae49c48f69ae4b6a1128c9a92cf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 27 Aug 2009 13:23:25 -0400 Subject: kprobes/x86-64: Fix to move common_interrupt to .kprobes.text Since nmi, debug and int3 returns to irq_return inside common_interrupt, probing this function will cause int3-loop, so it should be marked as __kprobes. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Ingo Molnar LKML-Reference: <20090827172325.8246.40000.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c251be7..36e2ef5 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -809,6 +809,10 @@ END(interrupt) call \func .endm +/* + * Interrupt entry/exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" /* * The interrupt stubs push (~vector+0x80) onto the stack and * then jump to common_interrupt. @@ -947,6 +951,10 @@ ENTRY(retint_kernel) CFI_ENDPROC END(common_interrupt) +/* + * End of kprobes section + */ + .popsection /* * APIC interrupts. -- cgit v0.10.2 From 65e234ec2c4a0659ca22531dc1372a185f088517 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 27 Aug 2009 13:23:32 -0400 Subject: kprobes: Prohibit to probe native_get_debugreg Since do_debug() calls get_debugreg(), native_get_debugreg() will be called from singlestepping. This can cause an int3 infinite loop. We can't put it in the .text.kprobes section because it is inlined, then we blacklist its name. Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: Ingo Molnar LKML-Reference: <20090827172332.8246.34194.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/kernel/kprobes.c b/kernel/kprobes.c index f72e96c..3267d90 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -90,6 +90,7 @@ static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) */ static struct kprobe_blackpoint kprobe_blacklist[] = { {"preempt_schedule",}, + {"native_get_debugreg",}, {NULL} /* Terminator */ }; -- cgit v0.10.2 From 50a482fbd96943516b7a2783900e8fe61a6425e7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Aug 2009 18:13:19 -0400 Subject: x86: Allow x86-32 instruction decoder selftest on x86-64 Pass $(CONFIG_64BIT) to the x86 insn decoder selftest in case we are decoding 32bit code on x86-64, which will happen when building kernel with ARCH=i386 on x86-64. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: Ingo Molnar LKML-Reference: <20090828221319.8778.88508.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 95e9cc4..1bd006c 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -1,6 +1,6 @@ PHONY += posttest quiet_cmd_posttest = TEST $@ - cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len + cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len $(CONFIG_64BIT) posttest: $(obj)/test_get_len vmlinux $(call cmd,posttest) diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index 1e81adb..a3273f4 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -45,7 +45,7 @@ const char *prog; static void usage(void) { fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" - " ./test_get_len\n"); + " %s [y|n](64bit flag)\n", prog); exit(1); } @@ -63,11 +63,15 @@ int main(int argc, char **argv) unsigned char insn_buf[16]; struct insn insn; int insns = 0; + int x86_64 = 0; prog = argv[0]; - if (argc > 1) + if (argc > 2) usage(); + if (argc == 2 && argv[1][0] == 'y') + x86_64 = 1; + while (fgets(line, BUFSIZE, stdin)) { char copy[BUFSIZE], *s, *tab1, *tab2; int nb = 0; @@ -93,11 +97,7 @@ int main(int argc, char **argv) break; } /* Decode an instruction */ -#ifdef __x86_64__ - insn_init(&insn, insn_buf, 1); -#else - insn_init(&insn, insn_buf, 0); -#endif + insn_init(&insn, insn_buf, x86_64); insn_get_length(&insn); if (insn.length != nb) { fprintf(stderr, "Error: %s", line); -- cgit v0.10.2 From 70069577323e6f72b845166724f34b9858134437 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Aug 2009 18:13:26 -0400 Subject: x86: Remove unused config macros from instruction decoder selftest Remove dummy definitions of CONFIG_X86_64 and CONFIG_X86_32 because those macros are not used in the instruction decoder anymore. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: Ingo Molnar LKML-Reference: <20090828221326.8778.70723.stgit@localhost.localdomain> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index a3273f4..376d338 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -21,11 +21,6 @@ #include #include -#ifdef __x86_64__ -#define CONFIG_X86_64 -#else -#define CONFIG_X86_32 -#endif #define unlikely(cond) (cond) #include -- cgit v0.10.2 From d6a65dffb30d8636b1e5d4c201564ef401a246cf Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 7 Sep 2009 03:23:20 +0200 Subject: tracing: Fix ring-buffer and ksym tracer merge interaction The compiler warns us about: kernel/trace/trace_ksym.c: In function ksym_hbp_handler: kernel/trace/trace_ksym.c:92: attention : passing argument 1 of trace_buffer_lock_reserve from incompatible pointer type kernel/trace/trace_ksym.c:106: attention : passing argument 1 of trace_buffer_unlock_commit from incompatible pointer type Commit "e77405ad" (tracing: pass around ring buffer instead of tracer) has changed the central tracing APIs. And this change has updated every callsites of these APIs except those that aren't in tracing/core, such as the ksym tracer. Cc: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 2fde875..6d5609c 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -78,17 +78,18 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) { struct ring_buffer_event *event; - struct trace_array *tr; struct ksym_trace_entry *entry; + struct ring_buffer *buffer; int pc; if (!ksym_tracing_enabled) return; - tr = ksym_trace_array; + buffer = ksym_trace_array->buffer; + pc = preempt_count(); - event = trace_buffer_lock_reserve(tr, TRACE_KSYM, + event = trace_buffer_lock_reserve(buffer, TRACE_KSYM, sizeof(*entry), 0, pc); if (!event) return; @@ -103,7 +104,7 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) ksym_collect_stats(hbp->info.address); #endif /* CONFIG_PROFILE_KSYM_TRACER */ - trace_buffer_unlock_commit(tr, event, 0, pc); + trace_buffer_unlock_commit(buffer, event, 0, pc); } /* Valid access types are represented as -- cgit v0.10.2 From f12b4f546b4e327d5620a544a2bddab68de66027 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 8 Sep 2009 12:32:46 -0400 Subject: x86: Add MMX support for instruction decoder Add MMX/SSE instructions to x86 opcode maps, since some of those instructions are used in the kernel. This also fixes failures in the x86 instruction decoder seftest. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: H. Peter Anvin Cc: Sam Ravnborg Cc: Frederic Weisbecker Cc: Ingo Molnar LKML-Reference: <20090908163246.23516.78835.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 083dd59..59e20d5 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -310,14 +310,14 @@ Referrer: 2-byte escape 0e: 0f: # 0x0f 0x10-0x1f -10: -11: -12: -13: -14: -15: -16: -17: +10: movups Vps,Wps | movss Vss,Wss (F3) | movupd Vpd,Wpd (66) | movsd Vsd,Wsd (F2) +11: movups Wps,Vps | movss Wss,Vss (F3) | movupd Wpd,Vpd (66) | movsd Wsd,Vsd (F2) +12: movlps Vq,Mq | movlpd Vq,Mq (66) | movhlps Vq,Uq | movddup Vq,Wq (F2) | movsldup Vq,Wq (F3) +13: mpvlps Mq,Vq | movlpd Mq,Vq (66) +14: unpcklps Vps,Wq | unpcklpd Vpd,Wq (66) +15: unpckhps Vps,Wq | unpckhpd Vpd,Wq (66) +16: movhps Vq,Mq | movhpd Vq,Mq (66) | movlsps Vq,Uq | movshdup Vq,Wq (F3) +17: movhps Mq,Vq | movhpd Mq,Vq (66) 18: Grp16 (1A) 19: 1a: @@ -337,12 +337,12 @@ Referrer: 2-byte escape 27: 28: movaps Vps,Wps | movapd Vpd,Wpd (66) 29: movaps Wps,Vps | movapd Wpd,Vpd (66) -2a: -2b: -2c: -2d: -2e: -2f: +2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2) +2b: movntps Mps,Vps | movntpd Mpd,Vpd (66) +2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2) +2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2) +2e: ucomiss Vss,Wss | ucomisd Vsd,Wsd (66) +2f: comiss Vss,Wss | comisd Vsd,Wsd (66) # 0x0f 0x30-0x3f 30: WRMSR 31: RDTSC @@ -378,56 +378,56 @@ Referrer: 2-byte escape 4e: CMOVLE/NG Gv,Ev 4f: CMOVNLE/G Gv,Ev # 0x0f 0x50-0x5f -50: -51: -52: -53: -54: -55: -56: -57: -58: -59: -5a: -5b: -5c: -5d: -5e: -5f: +50: movmskps Gd/q,Ups | movmskpd Gd/q,Upd (66) +51: sqrtps Vps,Wps | sqrtss Vss,Wss (F3) | sqrtpd Vpd,Wpd (66) | sqrtsd Vsd,Wsd (F2) +52: rsqrtps Vps,Wps | rsqrtss Vss,Wss (F3) +53: rcpps Vps,Wps | rcpss Vss,Wss (F3) +54: andps Vps,Wps | andpd Vpd,Wpd (66) +55: andnps Vps,Wps | andnpd Vpd,Wpd (66) +56: orps Vps,Wps | orpd Vpd,Wpd (66) +57: xorps Vps,Wps | xorpd Vpd,Wpd (66) +58: addps Vps,Wps | addss Vss,Wss (F3) | addpd Vpd,Wpd (66) | addsd Vsd,Wsd (F2) +59: mulps Vps,Wps | mulss Vss,Wss (F3) | mulpd Vpd,Wpd (66) | mulsd Vsd,Wsd (F2) +5a: cvtps2pd Vpd,Wps | cvtss2sd Vsd,Wss (F3) | cvtpd2ps Vps,Wpd (66) | cvtsd2ss Vsd,Wsd (F2) +5b: cvtdq2ps Vps,Wdq | cvtps2dq Vdq,Wps (66) | cvttps2dq Vdq,Wps (F3) +5c: subps Vps,Wps | subss Vss,Wss (F3) | subpd Vpd,Wpd (66) | subsd Vsd,Wsd (F2) +5d: minps Vps,Wps | minss Vss,Wss (F3) | minpd Vpd,Wpd (66) | minsd Vsd,Wsd (F2) +5e: divps Vps,Wps | divss Vss,Wss (F3) | divpd Vpd,Wpd (66) | divsd Vsd,Wsd (F2) +5f: maxps Vps,Wps | maxss Vss,Wss (F3) | maxpd Vpd,Wpd (66) | maxsd Vsd,Wsd (F2) # 0x0f 0x60-0x6f -60: -61: -62: -63: -64: -65: -66: -67: -68: -69: -6a: -6b: -6c: -6d: -6e: -6f: +60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66) +61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66) +62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66) +63: packsswb Pq,Qq | packsswb Vdq,Wdq (66) +64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66) +65: pcmpgtw Pq,Qq | pcmpgtw(66) Vdq,Wdq +66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66) +67: packuswb Pq,Qq | packuswb(66) Vdq,Wdq +68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66) +69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66) +6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66) +6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66) +6c: punpcklqdq Vdq,Wdq (66) +6d: punpckhqdq Vdq,Wdq (66) +6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66) +6f: movq Pq,Qq | movdqa Vdq,Wdq (66) | movdqu Vdq,Wdq (F3) # 0x0f 0x70-0x7f -70: +70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66) | pshufhw Vdq,Wdq,Ib (F3) | pshuflw VdqWdq,Ib (F2) 71: Grp12 (1A) 72: Grp13 (1A) 73: Grp14 (1A) -74: -75: -76: -77: +74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66) +75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66) +76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66) +77: emms 78: VMREAD Ed/q,Gd/q 79: VMWRITE Gd/q,Ed/q 7a: 7b: -7c: -7d: -7e: -7f: +7c: haddps(F2) Vps,Wps | haddpd(66) Vpd,Wpd +7d: hsubps(F2) Vps,Wps | hsubpd(66) Vpd,Wpd +7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66) | movq Vq,Wq (F3) +7f: movq Qq,Pq | movdqa Wdq,Vdq (66) | movdqu Wdq,Vdq (F3) # 0x0f 0x80-0x8f 80: JO Jz (f64) 81: JNO Jz (f64) @@ -499,11 +499,11 @@ bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf c0: XADD Eb,Gb c1: XADD Ev,Gv -c2: +c2: cmpps Vps,Wps,Ib | cmpss Vss,Wss,Ib (F3) | cmppd Vpd,Wpd,Ib (66) | cmpsd Vsd,Wsd,Ib (F2) c3: movnti Md/q,Gd/q -c4: -c5: -c6: +c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66) +c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66) +c6: shufps Vps,Wps,Ib | shufpd Vpd,Wpd,Ib (66) c7: Grp9 (1A) c8: BSWAP RAX/EAX/R8/R8D c9: BSWAP RCX/ECX/R9/R9D @@ -514,60 +514,131 @@ cd: BSWAP RBP/EBP/R13/R13D ce: BSWAP RSI/ESI/R14/R14D cf: BSWAP RDI/EDI/R15/R15D # 0x0f 0xd0-0xdf -d0: -d1: -d2: -d3: -d4: -d5: -d6: -d7: -d8: -d9: -da: -db: -dc: -dd: -de: -df: +d0: addsubps Vps,Wps (F2) | addsubpd Vpd,Wpd (66) +d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66) +d2: psrld Pq,Qq | psrld Vdq,Wdq (66) +d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66) +d4: paddq Pq,Qq | paddq Vdq,Wdq (66) +d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66) +d6: movq Wq,Vq (66) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66) +d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66) +d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66) +da: pminub Pq,Qq | pminub Vdq,Wdq (66) +db: pand Pq,Qq | pand Vdq,Wdq (66) +dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66) +dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66) +de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66) +df: pandn Pq,Qq | pandn Vdq,Wdq (66) # 0x0f 0xe0-0xef -e0: -e1: -e2: -e3: -e4: -e5: -e6: -e7: -e8: -e9: -ea: -eb: -ec: -ed: -ee: -ef: +e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66) +e1: psraw Pq,Qq | psraw Vdq,Wdq (66) +e2: psrad Pq,Qq | psrad Vdq,Wdq (66) +e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66) +e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66) +e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66) +e6: cvtpd2dq Vdq,Wpd (F2) | cvttpd2dq Vdq,Wpd (66) | cvtdq2pd Vpd,Wdq (F3) +e7: movntq Mq,Pq | movntdq Mdq,Vdq (66) +e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66) +e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66) +ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66) +eb: por Pq,Qq | por Vdq,Wdq (66) +ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66) +ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66) +ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66) +ef: pxor Pq,Qq | pxor Vdq,Wdq (66) # 0x0f 0xf0-0xff -f0: -f1: -f2: -f3: -f4: -f5: -f6: -f7: -f8: -f9: -fa: -fb: -fc: -fd: -fe: +f0: lddqu Vdq,Mdq (F2) +f1: psllw Pq,Qq | psllw Vdq,Wdq (66) +f2: pslld Pq,Qq | pslld Vdq,Wdq (66) +f3: psllq Pq,Qq | psllq Vdq,Wdq (66) +f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66) +f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66) +f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66) +f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66) +f8: psubb Pq,Qq | psubb Vdq,Wdq (66) +f9: psubw Pq,Qq | psubw Vdq,Wdq (66) +fa: psubd Pq,Qq | psubd Vdq,Wdq (66) +fb: psubq Pq,Qq | psubq Vdq,Wdq (66) +fc: paddb Pq,Qq | paddb Vdq,Wdq (66) +fd: paddw Pq,Qq | paddw Vdq,Wdq (66) +fe: paddd Pq,Qq | paddd Vdq,Wdq (66) ff: EndTable Table: 3-byte opcode 1 Referrer: 3-byte escape 1 +# 0x0f 0x38 0x00-0x0f +00: pshufb Pq,Qq | pshufb Vdq,Wdq (66) +01: phaddw Pq,Qq | phaddw Vdq,Wdq (66) +02: phaddd Pq,Qq | phaddd Vdq,Wdq (66) +03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66) +04: pmaddubsw Pq,Qq | pmaddubsw (66)Vdq,Wdq +05: phsubw Pq,Qq | phsubw Vdq,Wdq (66) +06: phsubd Pq,Qq | phsubd Vdq,Wdq (66) +07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66) +08: psignb Pq,Qq | psignb Vdq,Wdq (66) +09: psignw Pq,Qq | psignw Vdq,Wdq (66) +0a: psignd Pq,Qq | psignd Vdq,Wdq (66) +0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66) +0c: +0d: +0e: +0f: +# 0x0f 0x38 0x10-0x1f +10: pblendvb Vdq,Wdq (66) +11: +12: +13: +14: blendvps Vdq,Wdq (66) +15: blendvpd Vdq,Wdq (66) +16: +17: ptest Vdq,Wdq (66) +18: +19: +1a: +1b: +1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66) +1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66) +1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66) +1f: +# 0x0f 0x38 0x20-0x2f +20: pmovsxbw Vdq,Udq/Mq (66) +21: pmovsxbd Vdq,Udq/Md (66) +22: pmovsxbq Vdq,Udq/Mw (66) +23: pmovsxwd Vdq,Udq/Mq (66) +24: pmovsxwq Vdq,Udq/Md (66) +25: pmovsxdq Vdq,Udq/Mq (66) +26: +27: +28: pmuldq Vdq,Wdq (66) +29: pcmpeqq Vdq,Wdq (66) +2a: movntdqa Vdq,Mdq (66) +2b: packusdw Vdq,Wdq (66) +2c: +2d: +2e: +2f: +# 0x0f 0x38 0x30-0x3f +30: pmovzxbw Vdq,Udq/Mq (66) +31: pmovzxbd Vdq,Udq/Md (66) +32: pmovzxbq Vdq,Udq/Mw (66) +33: pmovzxwd Vdq,Udq/Mq (66) +34: pmovzxwq Vdq,Udq/Md (66) +35: pmovzxdq Vdq,Udq/Mq (66) +36: +37: pcmpgtq Vdq,Wdq (66) +38: pminsb Vdq,Wdq (66) +39: pminsd Vdq,Wdq (66) +3a: pminuw Vdq,Wdq (66) +3b: pminud Vdq,Wdq (66) +3c: pmaxsb Vdq,Wdq (66) +3d: pmaxsd Vdq,Wdq (66) +3e: pmaxuw Vdq,Wdq (66) +3f: pmaxud Vdq,Wdq (66) +# 0x0f 0x38 0x4f-0xff +40: pmulld Vdq,Wdq (66) +41: phminposuw Vdq,Wdq (66) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) @@ -576,7 +647,29 @@ EndTable Table: 3-byte opcode 2 Referrer: 3-byte escape 2 -# all opcode is for SSE +# 0x0f 0x3a 0x00-0xff +08: roundps Vdq,Wdq,Ib (66) +09: roundpd Vdq,Wdq,Ib (66) +0a: roundss Vss,Wss,Ib (66) +0b: roundsd Vsd,Wsd,Ib (66) +0c: blendps Vdq,Wdq,Ib (66) +0d: blendpd Vdq,Wdq,Ib (66) +0e: pblendw Vdq,Wdq,Ib (66) +0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66) +14: pextrb Rd/Mb,Vdq,Ib (66) +15: pextrw Rd/Mw,Vdq,Ib (66) +16: pextrd/pextrq Ed/q,Vdq,Ib (66) +17: extractps Ed,Vdq,Ib (66) +20: pinsrb Vdq,Rd/q/Mb,Ib (66) +21: insertps Vdq,Udq/Md,Ib (66) +22: pinsrd/pinsrq Vdq,Ed/q,Ib (66) +40: dpps Vdq,Wdq,Ib (66) +41: dppd Vdq,Wdq,Ib (66) +42: mpsadbw Vdq,Wdq,Ib (66) +60: pcmpestrm Vdq,Wdq,Ib (66) +61: pcmpestri Vdq,Wdq,Ib (66) +62: pcmpistrm Vdq,Wdq,Ib (66) +63: pcmpistri Vdq,Wdq,Ib (66) EndTable GrpTable: Grp1 -- cgit v0.10.2 From a00e817f42663941ea0aa5f85a9d1c4f8b212839 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 8 Sep 2009 12:47:55 -0400 Subject: kprobes/x86-32: Move irq-exit functions to kprobes section Move irq-exit functions to .kprobes.text section to protect against kprobes recursion. When I ran kprobe stress test on x86-32, I found below symbols cause unrecoverable recursive probing: ret_from_exception ret_from_intr check_userspace restore_all restore_all_notrace restore_nocheck irq_return And also, I found some interrupt/exception entry points that cause similar problems. This patch moves those symbols (including their container functions) to .kprobes.text section to prevent any kprobes probing. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Ingo Molnar LKML-Reference: <20090908164755.24050.81182.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d..beb30da 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -334,6 +334,10 @@ ENTRY(ret_from_fork) END(ret_from_fork) /* + * Interrupt exit functions should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" +/* * Return to user mode is not as complex as all this looks, * but we want the default path for a system call return to * go as quickly as possible which is why some of this is @@ -383,6 +387,10 @@ need_resched: END(resume_kernel) #endif CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* SYSENTER_RETURN points to after the "sysenter" instruction in the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ @@ -513,6 +521,10 @@ sysexit_audit: PTGS_TO_GS_EX ENDPROC(ia32_sysenter_target) +/* + * syscall stub including irq exit should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" # system call handler stub ENTRY(system_call) RING0_INT_FRAME # can't unwind into user space anyway @@ -705,6 +717,10 @@ syscall_badsys: jmp resume_userspace END(syscall_badsys) CFI_ENDPROC +/* + * End of kprobes section + */ + .popsection /* * System calls that need a pt_regs pointer. @@ -814,6 +830,10 @@ common_interrupt: ENDPROC(common_interrupt) CFI_ENDPROC +/* + * Irq entries should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" #define BUILD_INTERRUPT3(name, nr, fn) \ ENTRY(name) \ RING0_INT_FRAME; \ @@ -980,6 +1000,10 @@ ENTRY(spurious_interrupt_bug) jmp error_code CFI_ENDPROC END(spurious_interrupt_bug) +/* + * End of kprobes section + */ + .popsection ENTRY(kernel_thread_helper) pushl $0 # fake return address for unwinder diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3267d90..00d01b0 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -91,6 +91,8 @@ static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) static struct kprobe_blackpoint kprobe_blacklist[] = { {"preempt_schedule",}, {"native_get_debugreg",}, + {"irq_entries_start",}, + {"common_interrupt",}, {NULL} /* Terminator */ }; -- cgit v0.10.2 From ad5cafcdb09c57008c990edd309c0a563b09f238 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2009 19:53:06 -0400 Subject: x86/ptrace: Fix regs_get_argument_nth() to add correct offset Fix regs_get_argument_nth() to add correct offset bytes. Because offset_of() returns offset in byte, the offset should be added to char * instead of unsigned long *. Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <20090910235306.22412.31613.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a33a17d..caffb68 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -150,7 +150,7 @@ static const int arg_offs_table[] = { unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) { if (n < ARRAY_SIZE(arg_offs_table)) - return *((unsigned long *)regs + arg_offs_table[n]); + return *(unsigned long *)((char *)regs + arg_offs_table[n]); else { /* * The typical case: arg n is on the stack. -- cgit v0.10.2 From 2fba0c8867af47f6455490e7b59e512dd180c027 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2009 19:53:14 -0400 Subject: tracing/kprobes: Fix probe offset to be unsigned Prohibit user to specify negative offset from symbols. Since kprobe.offset is unsigned int, the offset must be always positive value. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <20090910235314.22412.64631.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 3de7517..db55318 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -25,15 +25,15 @@ probe events via /sys/kernel/debug/tracing/events/kprobes//filter. Synopsis of kprobe_events ------------------------- - p[:EVENT] SYMBOL[+offs|-offs]|MEMADDR [FETCHARGS] : Set a probe - r[:EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe + p[:EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe + r[:EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe - EVENT : Event name. If omitted, the event name is generated - based on SYMBOL+offs or MEMADDR. - SYMBOL[+offs|-offs] : Symbol+offset where the probe is inserted. - MEMADDR : Address where the probe is inserted. + EVENT : Event name. If omitted, the event name is generated + based on SYMBOL+offs or MEMADDR. + SYMBOL[+offs] : Symbol+offset where the probe is inserted. + MEMADDR : Address where the probe is inserted. - FETCHARGS : Arguments. Each probe can have up to 128 args. + FETCHARGS : Arguments. Each probe can have up to 128 args. %REG : Fetch register REG sN : Fetch Nth entry of stack (N >= 0) sa : Fetch stack address. diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 19a6de6..c24b7e9 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -210,7 +210,7 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp) return tp->symbol ? tp->symbol : "unknown"; } -static __kprobes long probe_offset(struct trace_probe *tp) +static __kprobes unsigned int probe_offset(struct trace_probe *tp) { return (probe_is_return(tp)) ? tp->rp.kp.offset : tp->kp.offset; } @@ -380,7 +380,7 @@ end: } /* Split symbol and offset. */ -static int split_symbol_offset(char *symbol, long *offset) +static int split_symbol_offset(char *symbol, unsigned long *offset) { char *tmp; int ret; @@ -389,16 +389,11 @@ static int split_symbol_offset(char *symbol, long *offset) return -EINVAL; tmp = strchr(symbol, '+'); - if (!tmp) - tmp = strchr(symbol, '-'); - if (tmp) { /* skip sign because strict_strtol doesn't accept '+' */ - ret = strict_strtol(tmp + 1, 0, offset); + ret = strict_strtoul(tmp + 1, 0, offset); if (ret) return ret; - if (*tmp == '-') - *offset = -(*offset); *tmp = '\0'; } else *offset = 0; @@ -520,7 +515,7 @@ static int create_trace_probe(int argc, char **argv) { /* * Argument syntax: - * - Add kprobe: p[:EVENT] SYMBOL[+OFFS|-OFFS]|ADDRESS [FETCHARGS] + * - Add kprobe: p[:EVENT] SYMBOL[+OFFS]|ADDRESS [FETCHARGS] * - Add kretprobe: r[:EVENT] SYMBOL[+0] [FETCHARGS] * Fetch args: * aN : fetch Nth of function argument. (N:0-) @@ -539,7 +534,7 @@ static int create_trace_probe(int argc, char **argv) int i, ret = 0; int is_return = 0; char *symbol = NULL, *event = NULL; - long offset = 0; + unsigned long offset = 0; void *addr = NULL; if (argc < 2) @@ -605,7 +600,7 @@ static int create_trace_probe(int argc, char **argv) if (tp->symbol) { kp->symbol_name = tp->symbol; - kp->offset = offset; + kp->offset = (unsigned int)offset; } else kp->addr = addr; @@ -675,7 +670,7 @@ static int probes_seq_show(struct seq_file *m, void *v) seq_printf(m, ":%s", tp->call.name); if (tp->symbol) - seq_printf(m, " %s%+ld", probe_symbol(tp), probe_offset(tp)); + seq_printf(m, " %s+%u", probe_symbol(tp), probe_offset(tp)); else seq_printf(m, " 0x%p", probe_address(tp)); -- cgit v0.10.2 From 4a846b443b4e8633057946a2234e23559a67ce42 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 11 Sep 2009 05:31:21 +0200 Subject: tracing/kprobes: Cleanup kprobe tracer code. Simplify trace_probe to remove a union, and remove some redundant wrappers. And also, cleanup create_trace_probe() function. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <20090910235322.22412.52525.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c24b7e9..4ce728c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -180,10 +180,7 @@ static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) struct trace_probe { struct list_head list; - union { - struct kprobe kp; - struct kretprobe rp; - }; + struct kretprobe rp; /* Use rp.kp for kprobe use */ unsigned long nhit; const char *symbol; /* symbol name */ struct ftrace_event_call call; @@ -202,7 +199,7 @@ static int kretprobe_trace_func(struct kretprobe_instance *ri, static __kprobes int probe_is_return(struct trace_probe *tp) { - return (tp->rp.handler == kretprobe_trace_func); + return tp->rp.handler != NULL; } static __kprobes const char *probe_symbol(struct trace_probe *tp) @@ -210,16 +207,6 @@ static __kprobes const char *probe_symbol(struct trace_probe *tp) return tp->symbol ? tp->symbol : "unknown"; } -static __kprobes unsigned int probe_offset(struct trace_probe *tp) -{ - return (probe_is_return(tp)) ? tp->rp.kp.offset : tp->kp.offset; -} - -static __kprobes void *probe_address(struct trace_probe *tp) -{ - return (probe_is_return(tp)) ? tp->rp.kp.addr : tp->kp.addr; -} - static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) { int ret = -EINVAL; @@ -269,8 +256,14 @@ static void unregister_probe_event(struct trace_probe *tp); static DEFINE_MUTEX(probe_lock); static LIST_HEAD(probe_list); -static struct trace_probe *alloc_trace_probe(const char *symbol, - const char *event, int nargs) +/* + * Allocate new trace_probe and initialize it (including kprobes). + */ +static struct trace_probe *alloc_trace_probe(const char *event, + void *addr, + const char *symbol, + unsigned long offs, + int nargs, int is_return) { struct trace_probe *tp; @@ -282,7 +275,16 @@ static struct trace_probe *alloc_trace_probe(const char *symbol, tp->symbol = kstrdup(symbol, GFP_KERNEL); if (!tp->symbol) goto error; - } + tp->rp.kp.symbol_name = tp->symbol; + tp->rp.kp.offset = offs; + } else + tp->rp.kp.addr = addr; + + if (is_return) + tp->rp.handler = kretprobe_trace_func; + else + tp->rp.kp.pre_handler = kprobe_trace_func; + if (!event) goto error; tp->call.name = kstrdup(event, GFP_KERNEL); @@ -327,7 +329,7 @@ static void __unregister_trace_probe(struct trace_probe *tp) if (probe_is_return(tp)) unregister_kretprobe(&tp->rp); else - unregister_kprobe(&tp->kp); + unregister_kprobe(&tp->rp.kp); } /* Unregister a trace_probe and probe_event: call with locking probe_lock */ @@ -349,14 +351,14 @@ static int register_trace_probe(struct trace_probe *tp) if (probe_is_return(tp)) ret = register_kretprobe(&tp->rp); else - ret = register_kprobe(&tp->kp); + ret = register_kprobe(&tp->rp.kp); if (ret) { pr_warning("Could not insert probe(%d)\n", ret); if (ret == -EILSEQ) { pr_warning("Probing address(0x%p) is not an " "instruction boundary.\n", - probe_address(tp)); + tp->rp.kp.addr); ret = -EINVAL; } goto end; @@ -530,12 +532,12 @@ static int create_trace_probe(int argc, char **argv) * +|-offs(ARG) : fetch memory at ARG +|- offs address. */ struct trace_probe *tp; - struct kprobe *kp; int i, ret = 0; int is_return = 0; char *symbol = NULL, *event = NULL; unsigned long offset = 0; void *addr = NULL; + char buf[MAX_EVENT_NAME_LEN]; if (argc < 2) return -EINVAL; @@ -577,33 +579,18 @@ static int create_trace_probe(int argc, char **argv) /* setup a probe */ if (!event) { /* Make a new event name */ - char buf[MAX_EVENT_NAME_LEN]; if (symbol) snprintf(buf, MAX_EVENT_NAME_LEN, "%c@%s%+ld", is_return ? 'r' : 'p', symbol, offset); else snprintf(buf, MAX_EVENT_NAME_LEN, "%c@0x%p", is_return ? 'r' : 'p', addr); - tp = alloc_trace_probe(symbol, buf, argc); - } else - tp = alloc_trace_probe(symbol, event, argc); + event = buf; + } + tp = alloc_trace_probe(event, addr, symbol, offset, argc, is_return); if (IS_ERR(tp)) return PTR_ERR(tp); - if (is_return) { - kp = &tp->rp.kp; - tp->rp.handler = kretprobe_trace_func; - } else { - kp = &tp->kp; - tp->kp.pre_handler = kprobe_trace_func; - } - - if (tp->symbol) { - kp->symbol_name = tp->symbol; - kp->offset = (unsigned int)offset; - } else - kp->addr = addr; - /* parse arguments */ ret = 0; for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { @@ -670,9 +657,9 @@ static int probes_seq_show(struct seq_file *m, void *v) seq_printf(m, ":%s", tp->call.name); if (tp->symbol) - seq_printf(m, " %s+%u", probe_symbol(tp), probe_offset(tp)); + seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); else - seq_printf(m, " 0x%p", probe_address(tp)); + seq_printf(m, " 0x%p", tp->rp.kp.addr); for (i = 0; i < tp->nr_args; i++) { ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); @@ -783,7 +770,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) struct trace_probe *tp = v; seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit, - probe_is_return(tp) ? tp->rp.kp.nmissed : tp->kp.nmissed); + tp->rp.kp.nmissed); return 0; } @@ -811,7 +798,7 @@ static const struct file_operations kprobe_profile_ops = { /* Kprobe handler */ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) { - struct trace_probe *tp = container_of(kp, struct trace_probe, kp); + struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct kprobe_trace_entry *entry; struct ring_buffer_event *event; struct ring_buffer *buffer; @@ -866,7 +853,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, entry = ring_buffer_event_data(event); entry->nargs = tp->nr_args; - entry->func = (unsigned long)probe_address(tp); + entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i], regs); @@ -945,7 +932,7 @@ static int probe_event_enable(struct ftrace_event_call *call) if (probe_is_return(tp)) return enable_kretprobe(&tp->rp); else - return enable_kprobe(&tp->kp); + return enable_kprobe(&tp->rp.kp); } static void probe_event_disable(struct ftrace_event_call *call) @@ -955,7 +942,7 @@ static void probe_event_disable(struct ftrace_event_call *call) if (probe_is_return(tp)) disable_kretprobe(&tp->rp); else - disable_kprobe(&tp->kp); + disable_kprobe(&tp->rp.kp); } static int probe_event_raw_init(struct ftrace_event_call *event_call) -- cgit v0.10.2 From e08d1c657f70bcaca11401cd6ac5c8fe59bd2bb7 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2009 19:53:30 -0400 Subject: tracing/kprobes: Add event profiling support Add *probe_profile_enable/disable to support kprobes raw events sampling from perf counters, like other ftrace events, when CONFIG_PROFILE_EVENT=y. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <20090910235329.22412.94731.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index db55318..8f882eb 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -62,13 +62,15 @@ enabled: You can enable/disable the probe by writing 1 or 0 on it. format: - It shows the format of this probe event. It also shows aliases of arguments + This shows the format of this probe event. It also shows aliases of arguments which you specified to kprobe_events. filter: You can write filtering rules of this event. And you can use both of aliase names and field names for describing filters. +id: + This shows the id of this probe event. Event Profiling --------------- diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 4ce728c..730e992 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "trace.h" #include "trace_output.h" @@ -280,6 +281,7 @@ static struct trace_probe *alloc_trace_probe(const char *event, } else tp->rp.kp.addr = addr; + /* Set handler here for checking whether this probe is return or not. */ if (is_return) tp->rp.handler = kretprobe_trace_func; else @@ -929,10 +931,13 @@ static int probe_event_enable(struct ftrace_event_call *call) { struct trace_probe *tp = (struct trace_probe *)call->data; - if (probe_is_return(tp)) + if (probe_is_return(tp)) { + tp->rp.handler = kretprobe_trace_func; return enable_kretprobe(&tp->rp); - else + } else { + tp->rp.kp.pre_handler = kprobe_trace_func; return enable_kprobe(&tp->rp.kp); + } } static void probe_event_disable(struct ftrace_event_call *call) @@ -1105,6 +1110,101 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, "func, ret_ip"); } +#ifdef CONFIG_EVENT_PROFILE + +/* Kprobe profile handler */ +static __kprobes int kprobe_profile_func(struct kprobe *kp, + struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); + struct ftrace_event_call *call = &tp->call; + struct kprobe_trace_entry *entry; + int size, i, pc; + unsigned long irq_flags; + + local_save_flags(irq_flags); + pc = preempt_count(); + + size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); + + do { + char raw_data[size]; + struct trace_entry *ent; + + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + entry = (struct kprobe_trace_entry *)raw_data; + ent = &entry->ent; + + tracing_generic_entry_update(ent, irq_flags, pc); + ent->type = call->id; + entry->nargs = tp->nr_args; + entry->ip = (unsigned long)kp->addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i], regs); + perf_tpcounter_event(call->id, entry->ip, 1, entry, size); + } while (0); + return 0; +} + +/* Kretprobe profile handler */ +static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); + struct ftrace_event_call *call = &tp->call; + struct kretprobe_trace_entry *entry; + int size, i, pc; + unsigned long irq_flags; + + local_save_flags(irq_flags); + pc = preempt_count(); + + size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); + + do { + char raw_data[size]; + struct trace_entry *ent; + + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + entry = (struct kretprobe_trace_entry *)raw_data; + ent = &entry->ent; + + tracing_generic_entry_update(ent, irq_flags, pc); + ent->type = call->id; + entry->nargs = tp->nr_args; + entry->func = (unsigned long)tp->rp.kp.addr; + entry->ret_ip = (unsigned long)ri->ret_addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i], regs); + perf_tpcounter_event(call->id, entry->ret_ip, 1, entry, size); + } while (0); + return 0; +} + +static int probe_profile_enable(struct ftrace_event_call *call) +{ + struct trace_probe *tp = (struct trace_probe *)call->data; + + if (atomic_inc_return(&call->profile_count)) + return 0; + + if (probe_is_return(tp)) { + tp->rp.handler = kretprobe_profile_func; + return enable_kretprobe(&tp->rp); + } else { + tp->rp.kp.pre_handler = kprobe_profile_func; + return enable_kprobe(&tp->rp.kp); + } +} + +static void probe_profile_disable(struct ftrace_event_call *call) +{ + if (atomic_add_negative(-1, &call->profile_count)) + probe_event_disable(call); +} + +#endif /* CONFIG_EVENT_PROFILE */ + static int register_probe_event(struct trace_probe *tp) { struct ftrace_event_call *call = &tp->call; @@ -1130,6 +1230,12 @@ static int register_probe_event(struct trace_probe *tp) call->enabled = 1; call->regfunc = probe_event_enable; call->unregfunc = probe_event_disable; + +#ifdef CONFIG_EVENT_PROFILE + atomic_set(&call->profile_count, -1); + call->profile_enable = probe_profile_enable; + call->profile_disable = probe_profile_disable; +#endif call->data = tp; ret = trace_add_event_call(call); if (ret) { -- cgit v0.10.2 From eca0d916f6429785bbc88db3ff66631cde62b432 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2009 19:53:38 -0400 Subject: tracing/kprobes: Add argument name support Add argument name assignment support and remove "alias" lines from format. This allows user to assign unique name to each argument. For example, $ echo p do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 > kprobe_events This assigns dfd, filename, flags, and mode to 1st - 4th arguments respectively. Trace buffer shows those names too. <...>-1439 [000] 1200885.933147: do_sys_open+0x0/0xdf: dfd=ffffff9c filename=bfa898ac flags=8000 mode=0 This helps users to know what each value means. Users can filter each events by these names too. Note that you can not filter by argN anymore. Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <20090910235337.22412.77383.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 8f882eb..aaa6c10 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -42,7 +42,8 @@ Synopsis of kprobe_events aN : Fetch function argument. (N >= 0)(*) rv : Fetch return value.(**) ra : Fetch return address.(**) - +|-offs(FETCHARG) : fetch memory at FETCHARG +|- offs address.(***) + +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) + NAME=FETCHARG: Set NAME as the argument name of FETCHARG. (*) aN may not correct on asmlinkaged functions and at the middle of function body. @@ -62,12 +63,10 @@ enabled: You can enable/disable the probe by writing 1 or 0 on it. format: - This shows the format of this probe event. It also shows aliases of arguments - which you specified to kprobe_events. + This shows the format of this probe event. filter: - You can write filtering rules of this event. And you can use both of aliase - names and field names for describing filters. + You can write filtering rules of this event. id: This shows the id of this probe event. @@ -85,10 +84,11 @@ Usage examples To add a probe as a new event, write a new definition to kprobe_events as below. - echo p:myprobe do_sys_open a0 a1 a2 a3 > /sys/kernel/debug/tracing/kprobe_events + echo p:myprobe do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 > /sys/kernel/debug/tracing/kprobe_events This sets a kprobe on the top of do_sys_open() function with recording -1st to 4th arguments as "myprobe" event. +1st to 4th arguments as "myprobe" event. As this example shows, users can +choose more familiar names for each arguments. echo r:myretprobe do_sys_open rv ra >> /sys/kernel/debug/tracing/kprobe_events @@ -99,7 +99,7 @@ recording return value and return address as "myretprobe" event. cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format name: myprobe -ID: 23 +ID: 75 format: field:unsigned short common_type; offset:0; size:2; field:unsigned char common_flags; offset:2; size:1; @@ -109,21 +109,15 @@ format: field: unsigned long ip; offset:16;tsize:8; field: int nargs; offset:24;tsize:4; - field: unsigned long arg0; offset:32;tsize:8; - field: unsigned long arg1; offset:40;tsize:8; - field: unsigned long arg2; offset:48;tsize:8; - field: unsigned long arg3; offset:56;tsize:8; + field: unsigned long dfd; offset:32;tsize:8; + field: unsigned long filename; offset:40;tsize:8; + field: unsigned long flags; offset:48;tsize:8; + field: unsigned long mode; offset:56;tsize:8; - alias: a0; original: arg0; - alias: a1; original: arg1; - alias: a2; original: arg2; - alias: a3; original: arg3; +print fmt: "%lx: dfd=%lx filename=%lx flags=%lx mode=%lx", ip, REC->dfd, REC->filename, REC->flags, REC->mode -print fmt: "%lx: 0x%lx 0x%lx 0x%lx 0x%lx", ip, arg0, arg1, arg2, arg3 - - You can see that the event has 4 arguments and alias expressions -corresponding to it. + You can see that the event has 4 arguments as in the expressions you specified. echo > /sys/kernel/debug/tracing/kprobe_events @@ -135,12 +129,12 @@ corresponding to it. # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | - <...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: 0x3 0x7fffd1ec4440 0x8000 0x0 - <...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: 0xfffffffffffffffe 0xffffffff81367a3a - <...>-1447 [001] 1038282.286885: do_sys_open+0x0/0xd6: 0xffffff9c 0x40413c 0x8000 0x1b6 - <...>-1447 [001] 1038282.286915: sys_open+0x1b/0x1d <- do_sys_open: 0x3 0xffffffff81367a3a - <...>-1447 [001] 1038282.286969: do_sys_open+0x0/0xd6: 0xffffff9c 0x4041c6 0x98800 0x10 - <...>-1447 [001] 1038282.286976: sys_open+0x1b/0x1d <- do_sys_open: 0x3 0xffffffff81367a3a + <...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: dfd=3 filename=7fffd1ec4440 flags=8000 mode=0 + <...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: rv=fffffffffffffffe ra=ffffffff81367a3a + <...>-1447 [001] 1038282.286885: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=40413c flags=8000 mode=1b6 + <...>-1447 [001] 1038282.286915: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a + <...>-1447 [001] 1038282.286969: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=4041c6 flags=98800 mode=10 + <...>-1447 [001] 1038282.286976: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a Each line shows when the kernel hits a probe, and <- SYMBOL means kernel diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 730e992..44dad1a 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -176,9 +176,14 @@ static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) } /** - * kprobe_trace_core + * Kprobe tracer core functions */ +struct probe_arg { + struct fetch_func fetch; + const char *name; +}; + struct trace_probe { struct list_head list; struct kretprobe rp; /* Use rp.kp for kprobe use */ @@ -187,12 +192,12 @@ struct trace_probe { struct ftrace_event_call call; struct trace_event event; unsigned int nr_args; - struct fetch_func args[]; + struct probe_arg args[]; }; #define SIZEOF_TRACE_PROBE(n) \ (offsetof(struct trace_probe, args) + \ - (sizeof(struct fetch_func) * (n))) + (sizeof(struct probe_arg) * (n))) static int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs); static int kretprobe_trace_func(struct kretprobe_instance *ri, @@ -301,15 +306,21 @@ error: return ERR_PTR(-ENOMEM); } +static void free_probe_arg(struct probe_arg *arg) +{ + if (arg->fetch.func == fetch_symbol) + free_symbol_cache(arg->fetch.data); + else if (arg->fetch.func == fetch_indirect) + free_indirect_fetch_data(arg->fetch.data); + kfree(arg->name); +} + static void free_trace_probe(struct trace_probe *tp) { int i; for (i = 0; i < tp->nr_args; i++) - if (tp->args[i].func == fetch_symbol) - free_symbol_cache(tp->args[i].data); - else if (tp->args[i].func == fetch_indirect) - free_indirect_fetch_data(tp->args[i].data); + free_probe_arg(&tp->args[i]); kfree(tp->call.name); kfree(tp->symbol); @@ -532,11 +543,13 @@ static int create_trace_probe(int argc, char **argv) * %REG : fetch register REG * Indirect memory fetch: * +|-offs(ARG) : fetch memory at ARG +|- offs address. + * Alias name of args: + * NAME=FETCHARG : set NAME as alias of FETCHARG. */ struct trace_probe *tp; int i, ret = 0; int is_return = 0; - char *symbol = NULL, *event = NULL; + char *symbol = NULL, *event = NULL, *arg = NULL; unsigned long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; @@ -596,12 +609,21 @@ static int create_trace_probe(int argc, char **argv) /* parse arguments */ ret = 0; for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { - if (strlen(argv[i]) > MAX_ARGSTR_LEN) { - pr_info("Argument%d(%s) is too long.\n", i, argv[i]); + /* Parse argument name */ + arg = strchr(argv[i], '='); + if (arg) + *arg++ = '\0'; + else + arg = argv[i]; + tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); + + /* Parse fetch argument */ + if (strlen(arg) > MAX_ARGSTR_LEN) { + pr_info("Argument%d(%s) is too long.\n", i, arg); ret = -ENOSPC; goto error; } - ret = parse_probe_arg(argv[i], &tp->args[i], is_return); + ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); if (ret) goto error; } @@ -664,12 +686,12 @@ static int probes_seq_show(struct seq_file *m, void *v) seq_printf(m, " 0x%p", tp->rp.kp.addr); for (i = 0; i < tp->nr_args; i++) { - ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); + ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch); if (ret < 0) { pr_warning("Argument%d decoding error(%d).\n", i, ret); return ret; } - seq_printf(m, " %s", buf); + seq_printf(m, " %s=%s", tp->args[i].name, buf); } seq_printf(m, "\n"); return 0; @@ -824,7 +846,7 @@ static __kprobes int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs) entry->nargs = tp->nr_args; entry->ip = (unsigned long)kp->addr; for (i = 0; i < tp->nr_args; i++) - entry->args[i] = call_fetch(&tp->args[i], regs); + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); if (!filter_current_check_discard(buffer, call, entry, event)) trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); @@ -858,7 +880,7 @@ static __kprobes int kretprobe_trace_func(struct kretprobe_instance *ri, entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; for (i = 0; i < tp->nr_args; i++) - entry->args[i] = call_fetch(&tp->args[i], regs); + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); if (!filter_current_check_discard(buffer, call, entry, event)) trace_nowake_buffer_unlock_commit(buffer, event, irq_flags, pc); @@ -872,9 +894,13 @@ print_kprobe_event(struct trace_iterator *iter, int flags) { struct kprobe_trace_entry *field; struct trace_seq *s = &iter->seq; + struct trace_event *event; + struct trace_probe *tp; int i; field = (struct kprobe_trace_entry *)iter->ent; + event = ftrace_find_event(field->ent.type); + tp = container_of(event, struct trace_probe, event); if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) goto partial; @@ -883,7 +909,8 @@ print_kprobe_event(struct trace_iterator *iter, int flags) goto partial; for (i = 0; i < field->nargs; i++) - if (!trace_seq_printf(s, " 0x%lx", field->args[i])) + if (!trace_seq_printf(s, " %s=%lx", + tp->args[i].name, field->args[i])) goto partial; if (!trace_seq_puts(s, "\n")) @@ -899,9 +926,13 @@ print_kretprobe_event(struct trace_iterator *iter, int flags) { struct kretprobe_trace_entry *field; struct trace_seq *s = &iter->seq; + struct trace_event *event; + struct trace_probe *tp; int i; field = (struct kretprobe_trace_entry *)iter->ent; + event = ftrace_find_event(field->ent.type); + tp = container_of(event, struct trace_probe, event); if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) goto partial; @@ -916,7 +947,8 @@ print_kretprobe_event(struct trace_iterator *iter, int flags) goto partial; for (i = 0; i < field->nargs; i++) - if (!trace_seq_printf(s, " 0x%lx", field->args[i])) + if (!trace_seq_printf(s, " %s=%lx", + tp->args[i].name, field->args[i])) goto partial; if (!trace_seq_puts(s, "\n")) @@ -972,7 +1004,6 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) { int ret, i; struct kprobe_trace_entry field; - char buf[MAX_ARGSTR_LEN + 1]; struct trace_probe *tp = (struct trace_probe *)event_call->data; ret = trace_define_common_fields(event_call); @@ -981,16 +1012,9 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) DEFINE_FIELD(unsigned long, ip, "ip", 0); DEFINE_FIELD(int, nargs, "nargs", 1); - for (i = 0; i < tp->nr_args; i++) { - /* Set argN as a field */ - sprintf(buf, "arg%d", i); - DEFINE_FIELD(unsigned long, args[i], buf, 0); - /* Set argument string as an alias field */ - ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); - if (ret < 0) - return ret; - DEFINE_FIELD(unsigned long, args[i], buf, 0); - } + /* Set argument names as fields */ + for (i = 0; i < tp->nr_args; i++) + DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); return 0; } @@ -998,7 +1022,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) { int ret, i; struct kretprobe_trace_entry field; - char buf[MAX_ARGSTR_LEN + 1]; struct trace_probe *tp = (struct trace_probe *)event_call->data; ret = trace_define_common_fields(event_call); @@ -1008,16 +1031,9 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) DEFINE_FIELD(unsigned long, func, "func", 0); DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0); DEFINE_FIELD(int, nargs, "nargs", 1); - for (i = 0; i < tp->nr_args; i++) { - /* Set argN as a field */ - sprintf(buf, "arg%d", i); - DEFINE_FIELD(unsigned long, args[i], buf, 0); - /* Set argument string as an alias field */ - ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); - if (ret < 0) - return ret; - DEFINE_FIELD(unsigned long, args[i], buf, 0); - } + /* Set argument names as fields */ + for (i = 0; i < tp->nr_args; i++) + DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); return 0; } @@ -1025,31 +1041,21 @@ static int __probe_event_show_format(struct trace_seq *s, struct trace_probe *tp, const char *fmt, const char *arg) { - int i, ret; - char buf[MAX_ARGSTR_LEN + 1]; + int i; - /* Show aliases */ - for (i = 0; i < tp->nr_args; i++) { - ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i]); - if (ret < 0) - return ret; - if (!trace_seq_printf(s, "\talias: %s;\toriginal: arg%d;\n", - buf, i)) - return 0; - } /* Show format */ if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt)) return 0; for (i = 0; i < tp->nr_args; i++) - if (!trace_seq_puts(s, " 0x%lx")) + if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) return 0; if (!trace_seq_printf(s, "\", %s", arg)) return 0; for (i = 0; i < tp->nr_args; i++) - if (!trace_seq_printf(s, ", arg%d", i)) + if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name)) return 0; return trace_seq_puts(s, "\n"); @@ -1071,17 +1077,14 @@ static int kprobe_event_show_format(struct ftrace_event_call *call, { struct kprobe_trace_entry field __attribute__((unused)); int ret, i; - char buf[8]; struct trace_probe *tp = (struct trace_probe *)call->data; SHOW_FIELD(unsigned long, ip, "ip"); SHOW_FIELD(int, nargs, "nargs"); /* Show fields */ - for (i = 0; i < tp->nr_args; i++) { - sprintf(buf, "arg%d", i); - SHOW_FIELD(unsigned long, args[i], buf); - } + for (i = 0; i < tp->nr_args; i++) + SHOW_FIELD(unsigned long, args[i], tp->args[i].name); trace_seq_puts(s, "\n"); return __probe_event_show_format(s, tp, "%lx:", "ip"); @@ -1092,7 +1095,6 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, { struct kretprobe_trace_entry field __attribute__((unused)); int ret, i; - char buf[8]; struct trace_probe *tp = (struct trace_probe *)call->data; SHOW_FIELD(unsigned long, func, "func"); @@ -1100,10 +1102,8 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, SHOW_FIELD(int, nargs, "nargs"); /* Show fields */ - for (i = 0; i < tp->nr_args; i++) { - sprintf(buf, "arg%d", i); - SHOW_FIELD(unsigned long, args[i], buf); - } + for (i = 0; i < tp->nr_args; i++) + SHOW_FIELD(unsigned long, args[i], tp->args[i].name); trace_seq_puts(s, "\n"); return __probe_event_show_format(s, tp, "%lx <- %lx:", @@ -1140,7 +1140,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, entry->nargs = tp->nr_args; entry->ip = (unsigned long)kp->addr; for (i = 0; i < tp->nr_args; i++) - entry->args[i] = call_fetch(&tp->args[i], regs); + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tpcounter_event(call->id, entry->ip, 1, entry, size); } while (0); return 0; @@ -1175,7 +1175,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, entry->func = (unsigned long)tp->rp.kp.addr; entry->ret_ip = (unsigned long)ri->ret_addr; for (i = 0; i < tp->nr_args; i++) - entry->args[i] = call_fetch(&tp->args[i], regs); + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tpcounter_event(call->id, entry->ret_ip, 1, entry, size); } while (0); return 0; -- cgit v0.10.2 From 6e9f23d1619f7badaf9090dac09e86a22d6061d8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2009 19:53:45 -0400 Subject: tracing/kprobes: Show event name in trace output Show event name in tracing/trace output. This also fixes kprobes events format to comply with other tracepoint events formats. Before patching: <...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: ... <...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: ... After patching: <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) ... <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) ... Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <20090910235345.22412.76527.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index aaa6c10..a849889 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -114,7 +114,7 @@ format: field: unsigned long flags; offset:48;tsize:8; field: unsigned long mode; offset:56;tsize:8; -print fmt: "%lx: dfd=%lx filename=%lx flags=%lx mode=%lx", ip, REC->dfd, REC->filename, REC->flags, REC->mode +print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, REC->filename, REC->flags, REC->mode You can see that the event has 4 arguments as in the expressions you specified. @@ -129,15 +129,15 @@ print fmt: "%lx: dfd=%lx filename=%lx flags=%lx mode=%lx", ip, REC->dfd, REC->fi # # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | - <...>-1447 [001] 1038282.286875: do_sys_open+0x0/0xd6: dfd=3 filename=7fffd1ec4440 flags=8000 mode=0 - <...>-1447 [001] 1038282.286878: sys_openat+0xc/0xe <- do_sys_open: rv=fffffffffffffffe ra=ffffffff81367a3a - <...>-1447 [001] 1038282.286885: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=40413c flags=8000 mode=1b6 - <...>-1447 [001] 1038282.286915: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a - <...>-1447 [001] 1038282.286969: do_sys_open+0x0/0xd6: dfd=ffffff9c filename=4041c6 flags=98800 mode=10 - <...>-1447 [001] 1038282.286976: sys_open+0x1b/0x1d <- do_sys_open: rv=3 ra=ffffffff81367a3a + <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0 + <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) rv=fffffffffffffffe ra=ffffffff81367a3a + <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6 + <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) rv=3 ra=ffffffff81367a3a + <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10 + <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) rv=3 ra=ffffffff81367a3a - Each line shows when the kernel hits a probe, and <- SYMBOL means kernel + Each line shows when the kernel hits an event, and <- SYMBOL means kernel returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel returns from do_sys_open to sys_open+0x1b). diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 44dad1a..1746afe 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -902,10 +902,13 @@ print_kprobe_event(struct trace_iterator *iter, int flags) event = ftrace_find_event(field->ent.type); tp = container_of(event, struct trace_probe, event); + if (!trace_seq_printf(s, "%s: (", tp->call.name)) + goto partial; + if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) goto partial; - if (!trace_seq_puts(s, ":")) + if (!trace_seq_puts(s, ")")) goto partial; for (i = 0; i < field->nargs; i++) @@ -934,6 +937,9 @@ print_kretprobe_event(struct trace_iterator *iter, int flags) event = ftrace_find_event(field->ent.type); tp = container_of(event, struct trace_probe, event); + if (!trace_seq_printf(s, "%s: (", tp->call.name)) + goto partial; + if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) goto partial; @@ -943,7 +949,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags) if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) goto partial; - if (!trace_seq_puts(s, ":")) + if (!trace_seq_puts(s, ")")) goto partial; for (i = 0; i < field->nargs; i++) @@ -1087,7 +1093,7 @@ static int kprobe_event_show_format(struct ftrace_event_call *call, SHOW_FIELD(unsigned long, args[i], tp->args[i].name); trace_seq_puts(s, "\n"); - return __probe_event_show_format(s, tp, "%lx:", "ip"); + return __probe_event_show_format(s, tp, "(%lx)", "REC->ip"); } static int kretprobe_event_show_format(struct ftrace_event_call *call, @@ -1106,8 +1112,8 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, SHOW_FIELD(unsigned long, args[i], tp->args[i].name); trace_seq_puts(s, "\n"); - return __probe_event_show_format(s, tp, "%lx <- %lx:", - "func, ret_ip"); + return __probe_event_show_format(s, tp, "(%lx <- %lx)", + "REC->func, REC->ret_ip"); } #ifdef CONFIG_EVENT_PROFILE -- cgit v0.10.2 From b8a4754147d61f5359a765a3afd3eb03012aa052 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 30 Jul 2009 11:10:02 +0200 Subject: x86, msr: Unify rdmsr_on_cpus/wrmsr_on_cpus Since rdmsr_on_cpus and wrmsr_on_cpus are almost identical, unify them into a common __rwmsr_on_cpus helper thus avoiding code duplication. While at it, convert cpumask_t's to const struct cpumask *. Signed-off-by: Borislav Petkov Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 7e2b6ba..9a00219 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -247,8 +247,8 @@ do { \ #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); -void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); -void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8]); diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 33a1e3c..41628b1 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -71,14 +71,9 @@ int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) } EXPORT_SYMBOL(wrmsr_on_cpu); -/* rdmsr on a bunch of CPUs - * - * @mask: which CPUs - * @msr_no: which MSR - * @msrs: array of MSR values - * - */ -void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, + struct msr *msrs, + void (*msr_func) (void *info)) { struct msr_info rv; int this_cpu; @@ -92,11 +87,23 @@ void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) this_cpu = get_cpu(); if (cpumask_test_cpu(this_cpu, mask)) - __rdmsr_on_cpu(&rv); + msr_func(&rv); - smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); + smp_call_function_many(mask, msr_func, &rv, 1); put_cpu(); } + +/* rdmsr on a bunch of CPUs + * + * @mask: which CPUs + * @msr_no: which MSR + * @msrs: array of MSR values + * + */ +void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) +{ + __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu); +} EXPORT_SYMBOL(rdmsr_on_cpus); /* @@ -107,24 +114,9 @@ EXPORT_SYMBOL(rdmsr_on_cpus); * @msrs: array of MSR values * */ -void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) +void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) { - struct msr_info rv; - int this_cpu; - - memset(&rv, 0, sizeof(rv)); - - rv.off = cpumask_first(mask); - rv.msrs = msrs; - rv.msr_no = msr_no; - - this_cpu = get_cpu(); - - if (cpumask_test_cpu(this_cpu, mask)) - __wrmsr_on_cpu(&rv); - - smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); - put_cpu(); + __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu); } EXPORT_SYMBOL(wrmsr_on_cpus); -- cgit v0.10.2 From f52487e9c0041842eeb77c6c48774414b1cede08 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 10 Sep 2009 19:53:53 -0400 Subject: tracing/kprobes: Support custom subsystem for each kprobe event Support specifying a custom subsystem(group) for each kprobe event. This allows users to create new group to control several probes at once, or add events to existing groups as additional tracepoints. New synopsis: p[:[subsys/]event-name] KADDR|KSYM[+offs] [ARGS] Signed-off-by: Masami Hiramatsu Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <20090910235353.22412.15149.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index a849889..6521681 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -25,9 +25,10 @@ probe events via /sys/kernel/debug/tracing/events/kprobes//filter. Synopsis of kprobe_events ------------------------- - p[:EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe - r[:EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe + p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe + r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe + GRP : Group name. If omitted, use "kprobes" for it. EVENT : Event name. If omitted, the event name is generated based on SYMBOL+offs or MEMADDR. SYMBOL[+offs] : Symbol+offset where the probe is inserted. diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1746afe..cbc0870 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -36,6 +36,7 @@ #define MAX_TRACE_ARGS 128 #define MAX_ARGSTR_LEN 63 #define MAX_EVENT_NAME_LEN 64 +#define KPROBE_EVENT_SYSTEM "kprobes" /* currently, trace_kprobe only supports X86. */ @@ -265,7 +266,8 @@ static LIST_HEAD(probe_list); /* * Allocate new trace_probe and initialize it (including kprobes). */ -static struct trace_probe *alloc_trace_probe(const char *event, +static struct trace_probe *alloc_trace_probe(const char *group, + const char *event, void *addr, const char *symbol, unsigned long offs, @@ -298,9 +300,16 @@ static struct trace_probe *alloc_trace_probe(const char *event, if (!tp->call.name) goto error; + if (!group) + goto error; + tp->call.system = kstrdup(group, GFP_KERNEL); + if (!tp->call.system) + goto error; + INIT_LIST_HEAD(&tp->list); return tp; error: + kfree(tp->call.name); kfree(tp->symbol); kfree(tp); return ERR_PTR(-ENOMEM); @@ -322,6 +331,7 @@ static void free_trace_probe(struct trace_probe *tp) for (i = 0; i < tp->nr_args; i++) free_probe_arg(&tp->args[i]); + kfree(tp->call.system); kfree(tp->call.name); kfree(tp->symbol); kfree(tp); @@ -530,8 +540,8 @@ static int create_trace_probe(int argc, char **argv) { /* * Argument syntax: - * - Add kprobe: p[:EVENT] SYMBOL[+OFFS]|ADDRESS [FETCHARGS] - * - Add kretprobe: r[:EVENT] SYMBOL[+0] [FETCHARGS] + * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] + * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] * Fetch args: * aN : fetch Nth of function argument. (N:0-) * rv : fetch return value @@ -549,7 +559,7 @@ static int create_trace_probe(int argc, char **argv) struct trace_probe *tp; int i, ret = 0; int is_return = 0; - char *symbol = NULL, *event = NULL, *arg = NULL; + char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; unsigned long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; @@ -566,6 +576,15 @@ static int create_trace_probe(int argc, char **argv) if (argv[0][1] == ':') { event = &argv[0][2]; + if (strchr(event, '/')) { + group = event; + event = strchr(group, '/') + 1; + event[-1] = '\0'; + if (strlen(group) == 0) { + pr_info("Group name is not specifiled\n"); + return -EINVAL; + } + } if (strlen(event) == 0) { pr_info("Event name is not specifiled\n"); return -EINVAL; @@ -592,6 +611,8 @@ static int create_trace_probe(int argc, char **argv) argc -= 2; argv += 2; /* setup a probe */ + if (!group) + group = KPROBE_EVENT_SYSTEM; if (!event) { /* Make a new event name */ if (symbol) @@ -602,7 +623,8 @@ static int create_trace_probe(int argc, char **argv) is_return ? 'r' : 'p', addr); event = buf; } - tp = alloc_trace_probe(event, addr, symbol, offset, argc, is_return); + tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, + is_return); if (IS_ERR(tp)) return PTR_ERR(tp); @@ -1217,7 +1239,6 @@ static int register_probe_event(struct trace_probe *tp) int ret; /* Initialize ftrace_event_call */ - call->system = "kprobes"; if (probe_is_return(tp)) { tp->event.trace = print_kretprobe_event; call->raw_init = probe_event_raw_init; -- cgit v0.10.2 From 2d5e067edc4635ff7515bfa9ab3edb38bc344cab Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 14 Sep 2009 16:48:56 -0400 Subject: tracing/kprobes: Fix trace_probe registration order Fix trace_probe registration order. ftrace_event_call and ftrace_event must be registered before kprobe/kretprobe, because tracing/profiling handlers dereference the event-id. Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Tom Zanussi LKML-Reference: <20090914204856.18779.52961.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index cbc0870..ea0db8e 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -347,20 +347,15 @@ static struct trace_probe *find_probe_event(const char *event) return NULL; } -static void __unregister_trace_probe(struct trace_probe *tp) +/* Unregister a trace_probe and probe_event: call with locking probe_lock */ +static void unregister_trace_probe(struct trace_probe *tp) { if (probe_is_return(tp)) unregister_kretprobe(&tp->rp); else unregister_kprobe(&tp->rp.kp); -} - -/* Unregister a trace_probe and probe_event: call with locking probe_lock */ -static void unregister_trace_probe(struct trace_probe *tp) -{ - unregister_probe_event(tp); - __unregister_trace_probe(tp); list_del(&tp->list); + unregister_probe_event(tp); } /* Register a trace_probe and probe_event */ @@ -371,6 +366,19 @@ static int register_trace_probe(struct trace_probe *tp) mutex_lock(&probe_lock); + /* register as an event */ + old_tp = find_probe_event(tp->call.name); + if (old_tp) { + /* delete old event */ + unregister_trace_probe(old_tp); + free_trace_probe(old_tp); + } + ret = register_probe_event(tp); + if (ret) { + pr_warning("Faild to register probe event(%d)\n", ret); + goto end; + } + if (probe_is_return(tp)) ret = register_kretprobe(&tp->rp); else @@ -384,21 +392,9 @@ static int register_trace_probe(struct trace_probe *tp) tp->rp.kp.addr); ret = -EINVAL; } - goto end; - } - /* register as an event */ - old_tp = find_probe_event(tp->call.name); - if (old_tp) { - /* delete old event */ - unregister_trace_probe(old_tp); - free_trace_probe(old_tp); - } - ret = register_probe_event(tp); - if (ret) { - pr_warning("Faild to register probe event(%d)\n", ret); - __unregister_trace_probe(tp); - } - list_add_tail(&tp->list, &probe_list); + unregister_probe_event(tp); + } else + list_add_tail(&tp->list, &probe_list); end: mutex_unlock(&probe_lock); return ret; -- cgit v0.10.2 From 588bebb74fe87270f94c2810652bd683d63c4b54 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 16 Sep 2009 11:42:55 -0400 Subject: ftrace: Fix trace_add_event_call() to initialize list Handle failure path in trace_add_event_call() to fix the below bug which occurred when I tried to add invalid event twice. Could not create debugfs 'kmalloc' directory Failed to register kprobe event: kmalloc Faild to register probe event(-1) ------------[ cut here ]------------ WARNING: at /home/mhiramat/ksrc/random-tracing/lib/list_debug.c:26 __list_add+0x27/0x5c() Hardware name: list_add corruption. next->prev should be prev (c07d78cc), but was 00001000. (next=d854236c). Modules linked in: sunrpc uinput virtio_net virtio_balloon i2c_piix4 pcspkr i2c_core virtio_blk virtio_pci virtio_ring virtio [last unloaded: scsi_wait_scan] Pid: 1394, comm: tee Not tainted 2.6.31-rc9 #51 Call Trace: [] warn_slowpath_common+0x65/0x7c [] ? __list_add+0x27/0x5c [] warn_slowpath_fmt+0x24/0x27 [] __list_add+0x27/0x5c [] list_add+0xa/0xc [] trace_add_event_call+0x60/0x97 [] command_trace_probe+0x42c/0x51b [] ? remove_wait_queue+0x22/0x27 [] ? __wake_up+0x32/0x3b [] probes_write+0xd4/0x10a [] ? probes_write+0x0/0x10a [] vfs_write+0x80/0xdf [] sys_write+0x3b/0x5d [] syscall_call+0x7/0xb ---[ end trace 2b962b5dc1fdc07d ]--- Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Tom Zanussi LKML-Reference: <4AB1077F.6020107@redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ba34920..83cc2c0 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1010,9 +1010,12 @@ static int __trace_add_event_call(struct ftrace_event_call *call) return -ENOENT; list_add(&call->list, &ftrace_events); - return event_create_dir(call, d_events, &ftrace_event_id_fops, + ret = event_create_dir(call, d_events, &ftrace_event_id_fops, &ftrace_enable_fops, &ftrace_event_filter_fops, &ftrace_event_format_fops); + if (ret < 0) + list_del(&call->list); + return ret; } /* Add an additional event_call dynamically */ -- cgit v0.10.2 From 4fead8e46fded93cc0d432ced774d9a3a8d21bad Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 14 Sep 2009 16:49:12 -0400 Subject: ftrace: Fix trace_remove_event_call() to lock trace_event_mutex Lock not only event_mutex but also trace_event_mutex in trace_remove_event_call() to protect __unregister_ftrace_event(). Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Tom Zanussi LKML-Reference: <20090914204912.18779.68734.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 83cc2c0..f85b0f1 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1054,6 +1054,9 @@ static void remove_subsystem_dir(const char *name) } } +/* + * Must be called under locking both of event_mutex and trace_event_mutex. + */ static void __trace_remove_event_call(struct ftrace_event_call *call) { ftrace_event_enable_disable(call, 0); @@ -1070,7 +1073,9 @@ static void __trace_remove_event_call(struct ftrace_event_call *call) void trace_remove_event_call(struct ftrace_event_call *call) { mutex_lock(&event_mutex); + down_write(&trace_event_mutex); __trace_remove_event_call(call); + up_write(&trace_event_mutex); mutex_unlock(&event_mutex); } -- cgit v0.10.2 From 50d780560785b068c358675c5f0bf6c83b5c373e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 14 Sep 2009 16:49:20 -0400 Subject: tracing/kprobes: Add probe handler dispatcher to support perf and ftrace concurrent use Add kprobe_dispatcher and kretprobe_dispatcher to dispatch event in both profile and tracing handlers. This allows simultaneous kprobe uses by ftrace and perf. Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Tom Zanussi LKML-Reference: <20090914204920.18779.57555.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ea0db8e..70b632c 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -185,10 +185,15 @@ struct probe_arg { const char *name; }; +/* Flags for trace_probe */ +#define TP_FLAG_TRACE 1 +#define TP_FLAG_PROFILE 2 + struct trace_probe { struct list_head list; struct kretprobe rp; /* Use rp.kp for kprobe use */ unsigned long nhit; + unsigned int flags; /* For TP_FLAG_* */ const char *symbol; /* symbol name */ struct ftrace_event_call call; struct trace_event event; @@ -200,10 +205,6 @@ struct trace_probe { (offsetof(struct trace_probe, args) + \ (sizeof(struct probe_arg) * (n))) -static int kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs); -static int kretprobe_trace_func(struct kretprobe_instance *ri, - struct pt_regs *regs); - static __kprobes int probe_is_return(struct trace_probe *tp) { return tp->rp.handler != NULL; @@ -263,6 +264,10 @@ static void unregister_probe_event(struct trace_probe *tp); static DEFINE_MUTEX(probe_lock); static LIST_HEAD(probe_list); +static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs); +static int kretprobe_dispatcher(struct kretprobe_instance *ri, + struct pt_regs *regs); + /* * Allocate new trace_probe and initialize it (including kprobes). */ @@ -288,11 +293,10 @@ static struct trace_probe *alloc_trace_probe(const char *group, } else tp->rp.kp.addr = addr; - /* Set handler here for checking whether this probe is return or not. */ if (is_return) - tp->rp.handler = kretprobe_trace_func; + tp->rp.handler = kretprobe_dispatcher; else - tp->rp.kp.pre_handler = kprobe_trace_func; + tp->rp.kp.pre_handler = kprobe_dispatcher; if (!event) goto error; @@ -379,6 +383,7 @@ static int register_trace_probe(struct trace_probe *tp) goto end; } + tp->flags = TP_FLAG_TRACE; if (probe_is_return(tp)) ret = register_kretprobe(&tp->rp); else @@ -987,23 +992,24 @@ static int probe_event_enable(struct ftrace_event_call *call) { struct trace_probe *tp = (struct trace_probe *)call->data; - if (probe_is_return(tp)) { - tp->rp.handler = kretprobe_trace_func; + tp->flags |= TP_FLAG_TRACE; + if (probe_is_return(tp)) return enable_kretprobe(&tp->rp); - } else { - tp->rp.kp.pre_handler = kprobe_trace_func; + else return enable_kprobe(&tp->rp.kp); - } } static void probe_event_disable(struct ftrace_event_call *call) { struct trace_probe *tp = (struct trace_probe *)call->data; - if (probe_is_return(tp)) - disable_kretprobe(&tp->rp); - else - disable_kprobe(&tp->rp.kp); + tp->flags &= ~TP_FLAG_TRACE; + if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) { + if (probe_is_return(tp)) + disable_kretprobe(&tp->rp); + else + disable_kprobe(&tp->rp.kp); + } } static int probe_event_raw_init(struct ftrace_event_call *event_call) @@ -1212,22 +1218,57 @@ static int probe_profile_enable(struct ftrace_event_call *call) if (atomic_inc_return(&call->profile_count)) return 0; - if (probe_is_return(tp)) { - tp->rp.handler = kretprobe_profile_func; + tp->flags |= TP_FLAG_PROFILE; + if (probe_is_return(tp)) return enable_kretprobe(&tp->rp); - } else { - tp->rp.kp.pre_handler = kprobe_profile_func; + else return enable_kprobe(&tp->rp.kp); - } } static void probe_profile_disable(struct ftrace_event_call *call) { + struct trace_probe *tp = (struct trace_probe *)call->data; + if (atomic_add_negative(-1, &call->profile_count)) - probe_event_disable(call); + tp->flags &= ~TP_FLAG_PROFILE; + + if (!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE))) { + if (probe_is_return(tp)) + disable_kretprobe(&tp->rp); + else + disable_kprobe(&tp->rp.kp); + } } +#endif /* CONFIG_EVENT_PROFILE */ + + +static __kprobes +int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); + if (tp->flags & TP_FLAG_TRACE) + kprobe_trace_func(kp, regs); +#ifdef CONFIG_EVENT_PROFILE + if (tp->flags & TP_FLAG_PROFILE) + kprobe_profile_func(kp, regs); #endif /* CONFIG_EVENT_PROFILE */ + return 0; /* We don't tweek kernel, so just return 0 */ +} + +static __kprobes +int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); + + if (tp->flags & TP_FLAG_TRACE) + kretprobe_trace_func(ri, regs); +#ifdef CONFIG_EVENT_PROFILE + if (tp->flags & TP_FLAG_PROFILE) + kretprobe_profile_func(ri, regs); +#endif /* CONFIG_EVENT_PROFILE */ + return 0; /* We don't tweek kernel, so just return 0 */ +} static int register_probe_event(struct trace_probe *tp) { -- cgit v0.10.2 From 74ebb63e7cd25f6fb02a45fc2ea7735bce1217c9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 14 Sep 2009 16:49:28 -0400 Subject: tracing/kprobes: Fix profiling alignment for perf_counter buffer Fix *probe_profile_func() to align buffer size, since perf_counter requires its buffer entries to be 8 bytes aligned. Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Tom Zanussi LKML-Reference: <20090914204928.18779.60029.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 70b632c..d8db935 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1149,18 +1149,23 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; - int size, i, pc; + int size, __size, i, pc; unsigned long irq_flags; local_save_flags(irq_flags); pc = preempt_count(); - size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); + __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); + size = ALIGN(__size + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); do { char raw_data[size]; struct trace_entry *ent; - + /* + * Zero dead bytes from alignment to avoid stack leak + * to userspace + */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; entry = (struct kprobe_trace_entry *)raw_data; ent = &entry->ent; @@ -1183,13 +1188,15 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; - int size, i, pc; + int size, __size, i, pc; unsigned long irq_flags; local_save_flags(irq_flags); pc = preempt_count(); - size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); + __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); + size = ALIGN(__size + sizeof(u32), sizeof(u64)); + size -= sizeof(u32); do { char raw_data[size]; -- cgit v0.10.2 From 5a0d9050db4d1147722b42afef9011251b2651ee Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 14 Sep 2009 16:49:37 -0400 Subject: tracing/kprobes: Disable kprobe events by default after creation Disable newly created kprobe events by default, not to disturb another user using ftrace. "Disturb" means when someone is using ftrace and another user tries to use perf-tools, (in near future) if he defines new kprobe event via perf-tools, then new events will mess up the frace buffer. Fix this to allow proper and transparent kprobes events concurrent usage between ftrace users and perf users. Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Tom Zanussi LKML-Reference: <20090914204937.18779.59422.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 6521681..9b8f7c6 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -122,8 +122,15 @@ print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->ip, REC->dfd, R echo > /sys/kernel/debug/tracing/kprobe_events - This clears all probe points. and you can see the traced information via -/sys/kernel/debug/tracing/trace. + This clears all probe points. + + Right after definition, each event is disabled by default. For tracing these +events, you need to enable it. + + echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable + echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable + + And you can see the traced information via /sys/kernel/debug/tracing/trace. cat /sys/kernel/debug/tracing/trace # tracer: nop diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index d8db935..f6821f1 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -383,7 +383,7 @@ static int register_trace_probe(struct trace_probe *tp) goto end; } - tp->flags = TP_FLAG_TRACE; + tp->rp.kp.flags |= KPROBE_FLAG_DISABLED; if (probe_is_return(tp)) ret = register_kretprobe(&tp->rp); else @@ -1298,7 +1298,7 @@ static int register_probe_event(struct trace_probe *tp) call->id = register_ftrace_event(&tp->event); if (!call->id) return -ENODEV; - call->enabled = 1; + call->enabled = 0; call->regfunc = probe_event_enable; call->unregfunc = probe_event_disable; -- cgit v0.10.2 From 1f0ab40976460bc4673fa204ce917a725185d8f2 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Tue, 15 Sep 2009 10:43:07 +0530 Subject: kprobes: Prevent re-registration of the same kprobe Prevent re-registration of the same kprobe. This situation, though unlikely, needs to be flagged since it can lead to a system crash if it's not handled. The core change itself is small, but the helper routine needed to be moved around a bit; hence the diffstat. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Masami Hiramatsu Cc: Jim Keniston Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <20090915051307.GB26458@in.ibm.com> Signed-off-by: Frederic Weisbecker diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 00d01b0..b946761 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -676,6 +676,40 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) return (kprobe_opcode_t *)(((char *)addr) + p->offset); } +/* Check passed kprobe is valid and return kprobe in kprobe_table. */ +static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) +{ + struct kprobe *old_p, *list_p; + + old_p = get_kprobe(p->addr); + if (unlikely(!old_p)) + return NULL; + + if (p != old_p) { + list_for_each_entry_rcu(list_p, &old_p->list, list) + if (list_p == p) + /* kprobe p is a valid probe */ + goto valid; + return NULL; + } +valid: + return old_p; +} + +/* Return error if the kprobe is being re-registered */ +static inline int check_kprobe_rereg(struct kprobe *p) +{ + int ret = 0; + struct kprobe *old_p; + + mutex_lock(&kprobe_mutex); + old_p = __get_valid_kprobe(p); + if (old_p) + ret = -EINVAL; + mutex_unlock(&kprobe_mutex); + return ret; +} + int __kprobes register_kprobe(struct kprobe *p) { int ret = 0; @@ -688,6 +722,10 @@ int __kprobes register_kprobe(struct kprobe *p) return -EINVAL; p->addr = addr; + ret = check_kprobe_rereg(p); + if (ret) + return ret; + preempt_disable(); if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr)) { @@ -757,26 +795,6 @@ out: } EXPORT_SYMBOL_GPL(register_kprobe); -/* Check passed kprobe is valid and return kprobe in kprobe_table. */ -static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) -{ - struct kprobe *old_p, *list_p; - - old_p = get_kprobe(p->addr); - if (unlikely(!old_p)) - return NULL; - - if (p != old_p) { - list_for_each_entry_rcu(list_p, &old_p->list, list) - if (list_p == p) - /* kprobe p is a valid probe */ - goto valid; - return NULL; - } -valid: - return old_p; -} - /* * Unregister a kprobe without a scheduler synchronization. */ -- cgit v0.10.2 From d01d4827858cdc2e1c437c87ab65ec0a00fd40f8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 21 Sep 2009 11:06:27 +0200 Subject: sched: Always show Cpus_allowed field in /proc//status The Cpus_allowed fields in /proc//status is currently only shown in case of CONFIG_CPUSETS. However their contents are also useful for the !CONFIG_CPUSETS case. So change the current behaviour and always show these fields. Signed-off-by: Heiko Carstens Cc: Andrew Morton Cc: Oleg Nesterov Cc: Peter Zijlstra LKML-Reference: <20090921090627.GD4649@osiris.boeblingen.de.ibm.com> Signed-off-by: Ingo Molnar diff --git a/fs/proc/array.c b/fs/proc/array.c index 725a650..762aea9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -321,6 +321,16 @@ static inline void task_context_switch_counts(struct seq_file *m, p->nivcsw); } +static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) +{ + seq_printf(m, "Cpus_allowed:\t"); + seq_cpumask(m, &task->cpus_allowed); + seq_printf(m, "\n"); + seq_printf(m, "Cpus_allowed_list:\t"); + seq_cpumask_list(m, &task->cpus_allowed); + seq_printf(m, "\n"); +} + int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { @@ -335,6 +345,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, } task_sig(m, task); task_cap(m, task); + task_cpus_allowed(m, task); cpuset_task_status_allowed(m, task); #if defined(CONFIG_S390) task_show_regs(m, task); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 7e75a41..b81f7f0 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2499,15 +2499,9 @@ const struct file_operations proc_cpuset_operations = { }; #endif /* CONFIG_PROC_PID_CPUSET */ -/* Display task cpus_allowed, mems_allowed in /proc//status file. */ +/* Display task mems_allowed in /proc//status file. */ void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) { - seq_printf(m, "Cpus_allowed:\t"); - seq_cpumask(m, &task->cpus_allowed); - seq_printf(m, "\n"); - seq_printf(m, "Cpus_allowed_list:\t"); - seq_cpumask_list(m, &task->cpus_allowed); - seq_printf(m, "\n"); seq_printf(m, "Mems_allowed:\t"); seq_nodemask(m, &task->mems_allowed); seq_printf(m, "\n"); -- cgit v0.10.2 From 979f693def9084a452846365dfde5dcb28366333 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 14:44:11 +0200 Subject: ratelimit: Use per ratelimit context locking I'd like to use printk_ratelimit() in atomic context, but that's not possible right now due to the spinlock usage this commit introduced more than a year ago: 717115e: printk ratelimiting rewrite As a first step push the lock into the ratelimit state structure. This allows us to deal with locking failures to be considered as an event related to that state being too busy. Also clean up the code a bit (without changing functionality): - tidy up the definitions - clean up the code flow This also shrinks the code a tiny bit: text data bss dec hex filename 264 0 4 268 10c ratelimit.o.before 255 0 0 255 ff ratelimit.o.after ( Whole-kernel data size got a bit larger, because we have two ratelimit-state data structures right now. ) Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: David S. Miller LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 00044b8..187bc16 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -1,20 +1,30 @@ #ifndef _LINUX_RATELIMIT_H #define _LINUX_RATELIMIT_H + #include +#include -#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) -#define DEFAULT_RATELIMIT_BURST 10 +#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) +#define DEFAULT_RATELIMIT_BURST 10 struct ratelimit_state { - int interval; - int burst; - int printed; - int missed; - unsigned long begin; + spinlock_t lock; /* protect the state */ + + int interval; + int burst; + int printed; + int missed; + unsigned long begin; }; -#define DEFINE_RATELIMIT_STATE(name, interval, burst) \ - struct ratelimit_state name = {interval, burst,} +#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \ + \ + struct ratelimit_state name = { \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + } extern int __ratelimit(struct ratelimit_state *rs); -#endif + +#endif /* _LINUX_RATELIMIT_H */ diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 26187ed..0e2c28e 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -7,15 +7,12 @@ * parameter. Now every user can use their own standalone ratelimit_state. * * This file is released under the GPLv2. - * */ #include #include #include -static DEFINE_SPINLOCK(ratelimit_lock); - /* * __ratelimit - rate limiting * @rs: ratelimit_state data @@ -26,11 +23,12 @@ static DEFINE_SPINLOCK(ratelimit_lock); int __ratelimit(struct ratelimit_state *rs) { unsigned long flags; + int ret; if (!rs->interval) return 1; - spin_lock_irqsave(&ratelimit_lock, flags); + spin_lock_irqsave(&rs->lock, flags); if (!rs->begin) rs->begin = jiffies; @@ -38,20 +36,19 @@ int __ratelimit(struct ratelimit_state *rs) if (rs->missed) printk(KERN_WARNING "%s: %d callbacks suppressed\n", __func__, rs->missed); - rs->begin = 0; + rs->begin = 0; rs->printed = 0; - rs->missed = 0; + rs->missed = 0; } - if (rs->burst && rs->burst > rs->printed) - goto print; - - rs->missed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 0; + if (rs->burst && rs->burst > rs->printed) { + rs->printed++; + ret = 1; + } else { + rs->missed++; + ret = 0; + } + spin_unlock_irqrestore(&rs->lock, flags); -print: - rs->printed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 1; + return ret; } EXPORT_SYMBOL(__ratelimit); -- cgit v0.10.2 From edaac8e3167501cda336231d00611bf59c164346 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 14:44:11 +0200 Subject: ratelimit: Fix/allow use in atomic contexts I'd like to use printk_ratelimit() in NMI context, but it's not robust right now due to spinlock usage in lib/ratelimit.c. If an NMI is unlucky enough to hit just that spot we might lock up trying to take the spinlock again. Fix that by using a trylock variant. If we contend on that lock we can genuinely skip the message because the state is just being accessed by another CPU (or by this CPU). ( We could use atomics for the suppressed messages field, but i doubt it matters in practice and it makes the code heavier. ) Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: David S. Miller LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 0e2c28e..69bfcac 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -28,7 +28,15 @@ int __ratelimit(struct ratelimit_state *rs) if (!rs->interval) return 1; - spin_lock_irqsave(&rs->lock, flags); + /* + * If we contend on this state's lock then almost + * by definition we are too busy to print a message, + * in addition to the one that will be printed by + * the entity that is holding the lock already: + */ + if (!spin_trylock_irqsave(&rs->lock, flags)) + return 1; + if (!rs->begin) rs->begin = jiffies; -- cgit v0.10.2 From 3fff4c42bd0a89869a0eb1e7874cc06ffa4aa0f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 22 Sep 2009 16:18:09 +0200 Subject: printk: Remove ratelimit.h from kernel.h Decouple kernel.h from ratelimit.h: the global declaration of printk's ratelimit_state is not needed, and it leads to messy circular dependencies due to ratelimit.h's (new) adding of a spinlock_types.h include. Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: David S. Miller LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b5b1e0..3305f33 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -241,7 +240,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern struct ratelimit_state printk_ratelimit_state; extern int printk_ratelimit(void); extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); diff --git a/include/linux/net.h b/include/linux/net.h index 9040a10..df20f68 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -358,6 +358,7 @@ static const struct proto_ops name##_ops = { \ #ifdef CONFIG_SYSCTL #include +#include extern struct ratelimit_state net_ratelimit_state; #endif diff --git a/kernel/printk.c b/kernel/printk.c index 602033a..b997c89 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -33,6 +33,7 @@ #include #include #include +#include #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1a631ba..6c37048 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -155,6 +156,8 @@ extern int no_unaligned_warning; extern int unaligned_dump_stack; #endif +extern struct ratelimit_state printk_ratelimit_state; + #ifdef CONFIG_RT_MUTEXES extern int max_lock_depth; #endif diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 69bfcac..5551731 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -9,7 +9,7 @@ * This file is released under the GPLv2. */ -#include +#include #include #include diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 7db1de0..887c03c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -10,7 +10,9 @@ #include #include #include +#include #include + #include #include diff --git a/net/core/utils.c b/net/core/utils.c index 83221ae..8382502 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -24,6 +24,8 @@ #include #include #include +#include + #include #include -- cgit v0.10.2 From 96a2c464de07d7c72988db851c029b204fc59108 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 1 Aug 2009 01:34:24 +0200 Subject: tracing/bkl: Add bkl ftrace events Add two events lock_kernel and unlock_kernel() to trace the bkl uses. This opens the door for userspace tools to perform statistics about the callsites that use it, dependencies with other locks (by pairing the trace with lock events), use with recursivity and so on... The {__reacquire,release}_kernel_lock() events are not traced because these are called from schedule, thus the sched events are sufficient to trace them. Example of a trace: hald-addon-stor-4152 [000] 165.875501: unlock_kernel: depth: 0, fs/block_dev.c:1358 __blkdev_put() hald-addon-stor-4152 [000] 167.832974: lock_kernel: depth: 0, fs/block_dev.c:1167 __blkdev_get() How to get the callsites that acquire it recursively: cd /debug/tracing/events/bkl echo "lock_depth > 0" > filter firefox-4951 [001] 206.276967: unlock_kernel: depth: 1, fs/reiserfs/super.c:575 reiserfs_dirty_inode() You can also filter by file and/or line. v2: Use of FILTER_PTR_STRING attribute for files and lines fields to make them traceable. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Li Zefan diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index 813be59..d48cc77 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -3,6 +3,7 @@ #ifdef CONFIG_LOCK_KERNEL #include +#include #define kernel_locked() (current->lock_depth >= 0) @@ -24,8 +25,18 @@ static inline int reacquire_kernel_lock(struct task_struct *task) return 0; } -extern void __lockfunc lock_kernel(void) __acquires(kernel_lock); -extern void __lockfunc unlock_kernel(void) __releases(kernel_lock); +extern void __lockfunc _lock_kernel(void) __acquires(kernel_lock); +extern void __lockfunc _unlock_kernel(void) __releases(kernel_lock); + +#define lock_kernel() { \ + trace_lock_kernel(__func__, __FILE__, __LINE__); \ + _lock_kernel(); \ +} + +#define unlock_kernel() { \ + trace_unlock_kernel(__func__, __FILE__, __LINE__); \ + _unlock_kernel(); \ +} /* * Various legacy drivers don't really need the BKL in a specific @@ -41,8 +52,8 @@ static inline void cycle_kernel_lock(void) #else -#define lock_kernel() do { } while(0) -#define unlock_kernel() do { } while(0) +#define lock_kernel() trace_lock_kernel(__func__, __FILE__, __LINE__); +#define unlock_kernel() trace_unlock_kernel(__func__, __FILE__, __LINE__); #define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) #define reacquire_kernel_lock(task) 0 diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h new file mode 100644 index 0000000..8abd620 --- /dev/null +++ b/include/trace/events/bkl.h @@ -0,0 +1,61 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM bkl + +#if !defined(_TRACE_BKL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BKL_H + +#include + +TRACE_EVENT(lock_kernel, + + TP_PROTO(const char *func, const char *file, int line), + + TP_ARGS(func, file, line), + + TP_STRUCT__entry( + __field( int, lock_depth ) + __field_ext( const char *, func, FILTER_PTR_STRING ) + __field_ext( const char *, file, FILTER_PTR_STRING ) + __field( int, line ) + ), + + TP_fast_assign( + /* We want to record the lock_depth after lock is acquired */ + __entry->lock_depth = current->lock_depth + 1; + __entry->func = func; + __entry->file = file; + __entry->line = line; + ), + + TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + __entry->file, __entry->line, __entry->func) +); + +TRACE_EVENT(unlock_kernel, + + TP_PROTO(const char *func, const char *file, int line), + + TP_ARGS(func, file, line), + + TP_STRUCT__entry( + __field(int, lock_depth) + __field(const char *, func) + __field(const char *, file) + __field(int, line) + ), + + TP_fast_assign( + __entry->lock_depth = current->lock_depth; + __entry->func = func; + __entry->file = file; + __entry->line = line; + ), + + TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + __entry->file, __entry->line, __entry->func) +); + +#endif /* _TRACE_BKL_H */ + +/* This part must be outside protection */ +#include diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 39f1029..5c10b2e 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -5,10 +5,11 @@ * relegated to obsolescence, but used by various less * important (or lazy) subsystems. */ -#include #include #include #include +#define CREATE_TRACE_POINTS +#include /* * The 'big kernel lock' @@ -113,7 +114,7 @@ static inline void __unlock_kernel(void) * This cannot happen asynchronously, so we only need to * worry about other CPU's. */ -void __lockfunc lock_kernel(void) +void __lockfunc _lock_kernel(void) { int depth = current->lock_depth+1; if (likely(!depth)) @@ -121,13 +122,13 @@ void __lockfunc lock_kernel(void) current->lock_depth = depth; } -void __lockfunc unlock_kernel(void) +void __lockfunc _unlock_kernel(void) { BUG_ON(current->lock_depth < 0); if (likely(--current->lock_depth < 0)) __unlock_kernel(); } -EXPORT_SYMBOL(lock_kernel); -EXPORT_SYMBOL(unlock_kernel); +EXPORT_SYMBOL(_lock_kernel); +EXPORT_SYMBOL(_unlock_kernel); -- cgit v0.10.2 From 737f453fd115ea0c9642ed6b30e37e296a4e3ed7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 1 Aug 2009 03:42:44 +0200 Subject: tracing/filters: Cleanup useless headers Cleanup remaining headers inclusion that were only useful when the filter framework and its tracing related filesystem user interface weren't yet separated. v2: Keep module.h, needed for EXPORT_SYMBOL_GPL Signed-off-by: Frederic Weisbecker Cc: Tom Zanussi Cc: Steven Rostedt Cc: Li Zefan diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 2324578..189663d 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -18,8 +18,6 @@ * Copyright (C) 2009 Tom Zanussi */ -#include -#include #include #include #include -- cgit v0.10.2 From f3f3f0092477d0165f3f1bf0fd518550b2abd097 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Sep 2009 15:27:41 +0200 Subject: tracing/event: Cleanup the useless dentry variable Cleanup the useless dentry variable while creating a kernel event set of files. trace_create_file() warns if it fails to create the file anyway, and we don't store the dentry anywhere. v2: Fix a small conflict in kernel/trace/trace_events.c Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Li Zefan diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 56c260b..8c91b7c 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -898,9 +898,9 @@ event_subsystem_dir(const char *name, struct dentry *d_events) "'%s/filter' entry\n", name); } - entry = trace_create_file("enable", 0644, system->entry, - (void *)system->name, - &ftrace_system_enable_fops); + trace_create_file("enable", 0644, system->entry, + (void *)system->name, + &ftrace_system_enable_fops); return system->entry; } @@ -912,7 +912,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, const struct file_operations *filter, const struct file_operations *format) { - struct dentry *entry; int ret; /* @@ -930,12 +929,12 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, } if (call->regfunc) - entry = trace_create_file("enable", 0644, call->dir, call, - enable); + trace_create_file("enable", 0644, call->dir, call, + enable); if (call->id && call->profile_enable) - entry = trace_create_file("id", 0444, call->dir, call, - id); + trace_create_file("id", 0444, call->dir, call, + id); if (call->define_fields) { ret = call->define_fields(call); @@ -944,16 +943,16 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, " events/%s\n", call->name); return ret; } - entry = trace_create_file("filter", 0644, call->dir, call, - filter); + trace_create_file("filter", 0644, call->dir, call, + filter); } /* A trace may not want to export its format */ if (!call->show_format) return 0; - entry = trace_create_file("format", 0444, call->dir, call, - format); + trace_create_file("format", 0444, call->dir, call, + format); return 0; } -- cgit v0.10.2 From cbfeb267cb0ff632dbc8ff02685012bee2e87434 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Thu, 24 Sep 2009 18:01:51 +0200 Subject: perf annotate: Add the cmp_null function and make use of it This function exists in builtin-report.c but not in builtin-annotate.c Functions that use cmp_null are shorter and clearer. Synchronizing functions between these two files will also make it easier to potential share code in the future. Signed-off-by: John Kacur Cc: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 1ec7416..a330873 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -82,6 +82,16 @@ struct sort_entry { size_t (*print)(FILE *fp, struct hist_entry *); }; +static int64_t cmp_null(void *l, void *r) +{ + if (!l && !r) + return 0; + else if (!l) + return -1; + else + return 1; +} + /* --sort pid */ static int64_t @@ -116,14 +126,8 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) char *comm_l = left->thread->comm; char *comm_r = right->thread->comm; - if (!comm_l || !comm_r) { - if (!comm_l && !comm_r) - return 0; - else if (!comm_l) - return -1; - else - return 1; - } + if (!comm_l || !comm_r) + return cmp_null(comm_l, comm_r); return strcmp(comm_l, comm_r); } @@ -149,14 +153,8 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) struct dso *dso_l = left->dso; struct dso *dso_r = right->dso; - if (!dso_l || !dso_r) { - if (!dso_l && !dso_r) - return 0; - else if (!dso_l) - return -1; - else - return 1; - } + if (!dso_l || !dso_r) + return cmp_null(dso_l, dso_r); return strcmp(dso_l->name, dso_r->name); } -- cgit v0.10.2 From 8b40f521cf1c9750eab0c04da9075e7484675e9c Mon Sep 17 00:00:00 2001 From: John Kacur Date: Thu, 24 Sep 2009 18:02:18 +0200 Subject: perf tools: Protect header files with a consistent style There was a colorful mix of header guards - standardize them. Signed-off-by: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 6f8ea9d..f26172c 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -1,5 +1,5 @@ -#ifndef CACHE_H -#define CACHE_H +#ifndef __PERF_CACHE_H +#define __PERF_CACHE_H #include "util.h" #include "strbuf.h" @@ -117,4 +117,4 @@ extern char *perf_pathdup(const char *fmt, ...) extern size_t strlcpy(char *dest, const char *src, size_t size); -#endif /* CACHE_H */ +#endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 43cf3ea..ad4626d 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -58,4 +58,4 @@ static inline u64 cumul_hits(struct callchain_node *node) int register_callchain_param(struct callchain_param *param); void append_chain(struct callchain_node *root, struct ip_callchain *chain, struct symbol **syms); -#endif +#endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 58d5975..24e8809 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -1,5 +1,5 @@ -#ifndef COLOR_H -#define COLOR_H +#ifndef __PERF_COLOR_H +#define __PERF_COLOR_H /* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ #define COLOR_MAXLEN 24 @@ -39,4 +39,4 @@ int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *bu int percent_color_fprintf(FILE *fp, const char *fmt, double percent); const char *get_percent_color(double percent); -#endif /* COLOR_H */ +#endif /* __PERF_COLOR_H */ diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 437eea5..02d1fa1 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -1,4 +1,6 @@ /* For debugging general purposes */ +#ifndef __PERF_DEBUG_H +#define __PERF_DEBUG_H extern int verbose; extern int dump_trace; @@ -6,3 +8,5 @@ extern int dump_trace; int eprintf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); void trace_event(event_t *event); + +#endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 2c9c26d6..c31a5da 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -1,5 +1,6 @@ #ifndef __PERF_RECORD_H #define __PERF_RECORD_H + #include "../perf.h" #include "util.h" #include @@ -101,4 +102,4 @@ struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); -#endif +#endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/exec_cmd.h b/tools/perf/util/exec_cmd.h index effe25e..31647ac 100644 --- a/tools/perf/util/exec_cmd.h +++ b/tools/perf/util/exec_cmd.h @@ -1,5 +1,5 @@ -#ifndef PERF_EXEC_CMD_H -#define PERF_EXEC_CMD_H +#ifndef __PERF_EXEC_CMD_H +#define __PERF_EXEC_CMD_H extern void perf_set_argv_exec_path(const char *exec_path); extern const char *perf_extract_argv0_path(const char *path); @@ -10,4 +10,4 @@ extern int execv_perf_cmd(const char **argv); /* NULL terminated */ extern int execl_perf_cmd(const char *cmd, ...); extern const char *system_path(const char *path); -#endif /* PERF_EXEC_CMD_H */ +#endif /* __PERF_EXEC_CMD_H */ diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index a0761bc..a2916b6 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -1,5 +1,5 @@ -#ifndef _PERF_HEADER_H -#define _PERF_HEADER_H +#ifndef __PERF_HEADER_H +#define __PERF_HEADER_H #include "../../../include/linux/perf_event.h" #include @@ -44,4 +44,4 @@ perf_header__find_attr(u64 id, struct perf_header *header); struct perf_header *perf_header__new(void); -#endif /* _PERF_HEADER_H */ +#endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/help.h b/tools/perf/util/help.h index 7128783..7f5c6de 100644 --- a/tools/perf/util/help.h +++ b/tools/perf/util/help.h @@ -1,5 +1,5 @@ -#ifndef HELP_H -#define HELP_H +#ifndef __PERF_HELP_H +#define __PERF_HELP_H struct cmdnames { size_t alloc; @@ -26,4 +26,4 @@ int is_in_cmdlist(struct cmdnames *c, const char *s); void list_commands(const char *title, struct cmdnames *main_cmds, struct cmdnames *other_cmds); -#endif /* HELP_H */ +#endif /* __PERF_HELP_H */ diff --git a/tools/perf/util/levenshtein.h b/tools/perf/util/levenshtein.h index 0173abe..b0fcb6d 100644 --- a/tools/perf/util/levenshtein.h +++ b/tools/perf/util/levenshtein.h @@ -1,8 +1,8 @@ -#ifndef LEVENSHTEIN_H -#define LEVENSHTEIN_H +#ifndef __PERF_LEVENSHTEIN_H +#define __PERF_LEVENSHTEIN_H int levenshtein(const char *string1, const char *string2, int swap_penalty, int substition_penalty, int insertion_penalty, int deletion_penalty); -#endif +#endif /* __PERF_LEVENSHTEIN_H */ diff --git a/tools/perf/util/module.h b/tools/perf/util/module.h index 8a592ef..098e041 100644 --- a/tools/perf/util/module.h +++ b/tools/perf/util/module.h @@ -1,5 +1,5 @@ -#ifndef _PERF_MODULE_ -#define _PERF_MODULE_ 1 +#ifndef __PERF_MODULE_ +#define __PERF_MODULE_ 1 #include #include "../types.h" @@ -50,4 +50,4 @@ size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp); struct module *mod_dso__find_module(struct mod_dso *self, const char *name); int mod_dso__load_modules(struct mod_dso *dso); -#endif /* _PERF_MODULE_ */ +#endif /* __PERF_MODULE_ */ diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 30c6081..8626a43 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -1,5 +1,5 @@ -#ifndef _PARSE_EVENTS_H -#define _PARSE_EVENTS_H +#ifndef __PERF_PARSE_EVENTS_H +#define __PERF_PARSE_EVENTS_H /* * Parse symbolic events/counts passed in as options: */ @@ -31,4 +31,4 @@ extern char debugfs_path[]; extern int valid_debugfs_mount(const char *debugfs); -#endif /* _PARSE_EVENTS_H */ +#endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index 2ee248f..948805a 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -1,5 +1,5 @@ -#ifndef PARSE_OPTIONS_H -#define PARSE_OPTIONS_H +#ifndef __PERF_PARSE_OPTIONS_H +#define __PERF_PARSE_OPTIONS_H enum parse_opt_type { /* special types */ @@ -174,4 +174,4 @@ extern int parse_opt_verbosity_cb(const struct option *, const char *, int); extern const char *parse_options_fix_filename(const char *prefix, const char *file); -#endif +#endif /* __PERF_PARSE_OPTIONS_H */ diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h index a5454a1..b6a0197 100644 --- a/tools/perf/util/quote.h +++ b/tools/perf/util/quote.h @@ -1,5 +1,5 @@ -#ifndef QUOTE_H -#define QUOTE_H +#ifndef __PERF_QUOTE_H +#define __PERF_QUOTE_H #include #include @@ -65,4 +65,4 @@ extern void perl_quote_print(FILE *stream, const char *src); extern void python_quote_print(FILE *stream, const char *src); extern void tcl_quote_print(FILE *stream, const char *src); -#endif +#endif /* __PERF_QUOTE_H */ diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h index cc1837d..d790287 100644 --- a/tools/perf/util/run-command.h +++ b/tools/perf/util/run-command.h @@ -1,5 +1,5 @@ -#ifndef RUN_COMMAND_H -#define RUN_COMMAND_H +#ifndef __PERF_RUN_COMMAND_H +#define __PERF_RUN_COMMAND_H enum { ERR_RUN_COMMAND_FORK = 10000, @@ -85,4 +85,4 @@ struct async { int start_async(struct async *async); int finish_async(struct async *async); -#endif +#endif /* __PERF_RUN_COMMAND_H */ diff --git a/tools/perf/util/sigchain.h b/tools/perf/util/sigchain.h index 618083bc..1a53c11 100644 --- a/tools/perf/util/sigchain.h +++ b/tools/perf/util/sigchain.h @@ -1,5 +1,5 @@ -#ifndef SIGCHAIN_H -#define SIGCHAIN_H +#ifndef __PERF_SIGCHAIN_H +#define __PERF_SIGCHAIN_H typedef void (*sigchain_fun)(int); @@ -8,4 +8,4 @@ int sigchain_pop(int sig); void sigchain_push_common(sigchain_fun f); -#endif /* SIGCHAIN_H */ +#endif /* __PERF_SIGCHAIN_H */ diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index d2aa86c..a3d121d 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -1,5 +1,5 @@ -#ifndef STRBUF_H -#define STRBUF_H +#ifndef __PERF_STRBUF_H +#define __PERF_STRBUF_H /* * Strbuf's can be use in many ways: as a byte array, or to store arbitrary @@ -134,4 +134,4 @@ extern int launch_editor(const char *path, struct strbuf *buffer, const char *co extern int strbuf_branchname(struct strbuf *sb, const char *name); extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name); -#endif /* STRBUF_H */ +#endif /* __PERF_STRBUF_H */ diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index bf39dfa..15c8274 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h @@ -1,5 +1,5 @@ -#ifndef _PERF_STRING_H_ -#define _PERF_STRING_H_ +#ifndef __PERF_STRING_H_ +#define __PERF_STRING_H_ #include "types.h" @@ -8,4 +8,4 @@ int hex2u64(const char *ptr, u64 *val); #define _STR(x) #x #define STR(x) _STR(x) -#endif +#endif /* __PERF_STRING_H */ diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index 921818e..cb46593 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -1,5 +1,5 @@ -#ifndef STRLIST_H_ -#define STRLIST_H_ +#ifndef __PERF_STRLIST_H +#define __PERF_STRLIST_H #include #include @@ -36,4 +36,4 @@ static inline unsigned int strlist__nr_entries(const struct strlist *self) } int strlist__parse_list(struct strlist *self, const char *s); -#endif /* STRLIST_H_ */ +#endif /* __PERF_STRLIST_H */ diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h index cd93195..e078198 100644 --- a/tools/perf/util/svghelper.h +++ b/tools/perf/util/svghelper.h @@ -1,5 +1,5 @@ -#ifndef _INCLUDE_GUARD_SVG_HELPER_ -#define _INCLUDE_GUARD_SVG_HELPER_ +#ifndef __PERF_SVGHELPER_H +#define __PERF_SVGHELPER_H #include "types.h" @@ -25,4 +25,4 @@ extern void svg_close(void); extern int svg_page_width; -#endif +#endif /* __PERF_SVGHELPER_H */ diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 6e84907..ee164f6 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -1,5 +1,5 @@ -#ifndef _PERF_SYMBOL_ -#define _PERF_SYMBOL_ 1 +#ifndef __PERF_SYMBOL +#define __PERF_SYMBOL 1 #include #include "types.h" @@ -89,4 +89,4 @@ extern struct dso *vdso; extern struct dso *hypervisor_dso; extern const char *vmlinux_name; extern int modules; -#endif /* _PERF_SYMBOL_ */ +#endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 32aea3c..693ed1e 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -1,3 +1,6 @@ +#ifndef __PERF_THREAD_H +#define __PERF_THREAD_H + #include #include #include @@ -20,3 +23,5 @@ void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *self, struct thread *parent); struct map *thread__find_map(struct thread *self, u64 ip); size_t threads__fprintf(FILE *fp, struct rb_root *threads); + +#endif /* __PERF_THREAD_H */ diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 693f815..162c3e6 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -1,5 +1,5 @@ -#ifndef _TRACE_EVENTS_H -#define _TRACE_EVENTS_H +#ifndef __PERF_TRACE_EVENTS_H +#define __PERF_TRACE_EVENTS_H #include "parse-events.h" @@ -242,4 +242,4 @@ void *raw_field_ptr(struct event *event, const char *name, void *data); void read_tracing_data(struct perf_event_attr *pattrs, int nb_events); -#endif /* _TRACE_EVENTS_H */ +#endif /* __PERF_TRACE_EVENTS_H */ diff --git a/tools/perf/util/types.h b/tools/perf/util/types.h index 5e75f90..7d6b833 100644 --- a/tools/perf/util/types.h +++ b/tools/perf/util/types.h @@ -1,5 +1,5 @@ -#ifndef _PERF_TYPES_H -#define _PERF_TYPES_H +#ifndef __PERF_TYPES_H +#define __PERF_TYPES_H /* * We define u64 as unsigned long long for every architecture @@ -14,4 +14,4 @@ typedef signed short s16; typedef unsigned char u8; typedef signed char s8; -#endif /* _PERF_TYPES_H */ +#endif /* __PERF_TYPES_H */ diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h index cadf8cf..2fa967e 100644 --- a/tools/perf/util/values.h +++ b/tools/perf/util/values.h @@ -1,5 +1,5 @@ -#ifndef _PERF_VALUES_H -#define _PERF_VALUES_H +#ifndef __PERF_VALUES_H +#define __PERF_VALUES_H #include "types.h" @@ -24,4 +24,4 @@ void perf_read_values_add_value(struct perf_read_values *values, void perf_read_values_display(FILE *fp, struct perf_read_values *values, int raw); -#endif /* _PERF_VALUES_H */ +#endif /* __PERF_VALUES_H */ -- cgit v0.10.2 From dd68ada2d417e57b848822a1407b5317a54136c5 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Thu, 24 Sep 2009 18:02:49 +0200 Subject: perf tools: Create util/sort.and use it Create util/sort.[ch] and move common functionality for builtin-report.c and builtin-annotate.c there, and make use of it. Signed-off-by: John Kacur LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index b5f1953..0a9e5ae 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -339,6 +339,7 @@ LIB_H += util/symbol.h LIB_H += util/module.h LIB_H += util/color.h LIB_H += util/values.h +LIB_H += util/sort.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -374,6 +375,7 @@ LIB_OBJS += util/trace-event-parse.o LIB_OBJS += util/trace-event-read.o LIB_OBJS += util/trace-event-info.o LIB_OBJS += util/svghelper.o +LIB_OBJS += util/sort.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index a330873..059c565 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -22,12 +22,10 @@ #include "util/parse-options.h" #include "util/parse-events.h" #include "util/thread.h" +#include "util/sort.h" static char const *input_name = "perf.data"; -static char default_sort_order[] = "comm,symbol"; -static char *sort_order = default_sort_order; - static int force; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -55,207 +53,6 @@ struct sym_ext { static struct rb_root hist; -struct hist_entry { - struct rb_node rb_node; - - struct thread *thread; - struct map *map; - struct dso *dso; - struct symbol *sym; - u64 ip; - char level; - - uint32_t count; -}; - -/* - * configurable sorting bits - */ - -struct sort_entry { - struct list_head list; - - const char *header; - - int64_t (*cmp)(struct hist_entry *, struct hist_entry *); - int64_t (*collapse)(struct hist_entry *, struct hist_entry *); - size_t (*print)(FILE *fp, struct hist_entry *); -}; - -static int64_t cmp_null(void *l, void *r) -{ - if (!l && !r) - return 0; - else if (!l) - return -1; - else - return 1; -} - -/* --sort pid */ - -static int64_t -sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} - -static size_t -sort__thread_print(FILE *fp, struct hist_entry *self) -{ - return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); -} - -static struct sort_entry sort_thread = { - .header = " Command: Pid", - .cmp = sort__thread_cmp, - .print = sort__thread_print, -}; - -/* --sort comm */ - -static int64_t -sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} - -static int64_t -sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) -{ - char *comm_l = left->thread->comm; - char *comm_r = right->thread->comm; - - if (!comm_l || !comm_r) - return cmp_null(comm_l, comm_r); - - return strcmp(comm_l, comm_r); -} - -static size_t -sort__comm_print(FILE *fp, struct hist_entry *self) -{ - return fprintf(fp, "%16s", self->thread->comm); -} - -static struct sort_entry sort_comm = { - .header = " Command", - .cmp = sort__comm_cmp, - .collapse = sort__comm_collapse, - .print = sort__comm_print, -}; - -/* --sort dso */ - -static int64_t -sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct dso *dso_l = left->dso; - struct dso *dso_r = right->dso; - - if (!dso_l || !dso_r) - return cmp_null(dso_l, dso_r); - - return strcmp(dso_l->name, dso_r->name); -} - -static size_t -sort__dso_print(FILE *fp, struct hist_entry *self) -{ - if (self->dso) - return fprintf(fp, "%-25s", self->dso->name); - - return fprintf(fp, "%016llx ", (u64)self->ip); -} - -static struct sort_entry sort_dso = { - .header = "Shared Object ", - .cmp = sort__dso_cmp, - .print = sort__dso_print, -}; - -/* --sort symbol */ - -static int64_t -sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) -{ - u64 ip_l, ip_r; - - if (left->sym == right->sym) - return 0; - - ip_l = left->sym ? left->sym->start : left->ip; - ip_r = right->sym ? right->sym->start : right->ip; - - return (int64_t)(ip_r - ip_l); -} - -static size_t -sort__sym_print(FILE *fp, struct hist_entry *self) -{ - size_t ret = 0; - - if (verbose) - ret += fprintf(fp, "%#018llx ", (u64)self->ip); - - if (self->sym) { - ret += fprintf(fp, "[%c] %s", - self->dso == kernel_dso ? 'k' : '.', self->sym->name); - } else { - ret += fprintf(fp, "%#016llx", (u64)self->ip); - } - - return ret; -} - -static struct sort_entry sort_sym = { - .header = "Symbol", - .cmp = sort__sym_cmp, - .print = sort__sym_print, -}; - -static int sort__need_collapse = 0; - -struct sort_dimension { - const char *name; - struct sort_entry *entry; - int taken; -}; - -static struct sort_dimension sort_dimensions[] = { - { .name = "pid", .entry = &sort_thread, }, - { .name = "comm", .entry = &sort_comm, }, - { .name = "dso", .entry = &sort_dso, }, - { .name = "symbol", .entry = &sort_sym, }, -}; - -static LIST_HEAD(hist_entry__sort_list); - -static int sort_dimension__add(char *tok) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { - struct sort_dimension *sd = &sort_dimensions[i]; - - if (sd->taken) - continue; - - if (strncasecmp(tok, sd->name, strlen(tok))) - continue; - - if (sd->entry->collapse) - sort__need_collapse = 1; - - list_add_tail(&sd->entry->list, &hist_entry__sort_list); - sd->taken = 1; - - return 0; - } - - return -ESRCH; -} - static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1137,5 +934,11 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) setup_pager(); + if (field_sep && *field_sep == '.') { + fputs("'.' is the only non valid --field-separator argument\n", + stderr); + exit(129); + } + return __cmd_annotate(); } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 19669c2..7b43504 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -27,15 +27,13 @@ #include "util/parse-events.h" #include "util/thread.h" +#include "util/sort.h" static char const *input_name = "perf.data"; -static char default_sort_order[] = "comm,dso,symbol"; -static char *sort_order = default_sort_order; static char *dso_list_str, *comm_list_str, *sym_list_str, *col_width_list_str; static struct strlist *dso_list, *comm_list, *sym_list; -static char *field_sep; static int force; static int input; @@ -53,10 +51,6 @@ static char *pretty_printing_style = default_pretty_printing_style; static unsigned long page_size; static unsigned long mmap_window = 32; -static char default_parent_pattern[] = "^sys_|^do_page_fault"; -static char *parent_pattern = default_parent_pattern; -static regex_t parent_regex; - static int exclude_other = 1; static char callchain_default_opt[] = "fractal,0.5"; @@ -80,304 +74,8 @@ struct callchain_param callchain_param = { static u64 sample_type; -static int repsep_fprintf(FILE *fp, const char *fmt, ...) -{ - int n; - va_list ap; - - va_start(ap, fmt); - if (!field_sep) - n = vfprintf(fp, fmt, ap); - else { - char *bf = NULL; - n = vasprintf(&bf, fmt, ap); - if (n > 0) { - char *sep = bf; - - while (1) { - sep = strchr(sep, *field_sep); - if (sep == NULL) - break; - *sep = '.'; - } - } - fputs(bf, fp); - free(bf); - } - va_end(ap); - return n; -} - -static unsigned int dsos__col_width, - comms__col_width, - threads__col_width; - -/* - * histogram, sorted on item, collects counts - */ - static struct rb_root hist; -struct hist_entry { - struct rb_node rb_node; - - struct thread *thread; - struct map *map; - struct dso *dso; - struct symbol *sym; - struct symbol *parent; - u64 ip; - char level; - struct callchain_node callchain; - struct rb_root sorted_chain; - - u64 count; -}; - -/* - * configurable sorting bits - */ - -struct sort_entry { - struct list_head list; - - const char *header; - - int64_t (*cmp)(struct hist_entry *, struct hist_entry *); - int64_t (*collapse)(struct hist_entry *, struct hist_entry *); - size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width); - unsigned int *width; - bool elide; -}; - -static int64_t cmp_null(void *l, void *r) -{ - if (!l && !r) - return 0; - else if (!l) - return -1; - else - return 1; -} - -/* --sort pid */ - -static int64_t -sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} - -static size_t -sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width) -{ - return repsep_fprintf(fp, "%*s:%5d", width - 6, - self->thread->comm ?: "", self->thread->pid); -} - -static struct sort_entry sort_thread = { - .header = "Command: Pid", - .cmp = sort__thread_cmp, - .print = sort__thread_print, - .width = &threads__col_width, -}; - -/* --sort comm */ - -static int64_t -sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} - -static int64_t -sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) -{ - char *comm_l = left->thread->comm; - char *comm_r = right->thread->comm; - - if (!comm_l || !comm_r) - return cmp_null(comm_l, comm_r); - - return strcmp(comm_l, comm_r); -} - -static size_t -sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width) -{ - return repsep_fprintf(fp, "%*s", width, self->thread->comm); -} - -static struct sort_entry sort_comm = { - .header = "Command", - .cmp = sort__comm_cmp, - .collapse = sort__comm_collapse, - .print = sort__comm_print, - .width = &comms__col_width, -}; - -/* --sort dso */ - -static int64_t -sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct dso *dso_l = left->dso; - struct dso *dso_r = right->dso; - - if (!dso_l || !dso_r) - return cmp_null(dso_l, dso_r); - - return strcmp(dso_l->name, dso_r->name); -} - -static size_t -sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width) -{ - if (self->dso) - return repsep_fprintf(fp, "%-*s", width, self->dso->name); - - return repsep_fprintf(fp, "%*llx", width, (u64)self->ip); -} - -static struct sort_entry sort_dso = { - .header = "Shared Object", - .cmp = sort__dso_cmp, - .print = sort__dso_print, - .width = &dsos__col_width, -}; - -/* --sort symbol */ - -static int64_t -sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) -{ - u64 ip_l, ip_r; - - if (left->sym == right->sym) - return 0; - - ip_l = left->sym ? left->sym->start : left->ip; - ip_r = right->sym ? right->sym->start : right->ip; - - return (int64_t)(ip_r - ip_l); -} - -static size_t -sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) -{ - size_t ret = 0; - - if (verbose) - ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, - dso__symtab_origin(self->dso)); - - ret += repsep_fprintf(fp, "[%c] ", self->level); - if (self->sym) { - ret += repsep_fprintf(fp, "%s", self->sym->name); - - if (self->sym->module) - ret += repsep_fprintf(fp, "\t[%s]", - self->sym->module->name); - } else { - ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip); - } - - return ret; -} - -static struct sort_entry sort_sym = { - .header = "Symbol", - .cmp = sort__sym_cmp, - .print = sort__sym_print, -}; - -/* --sort parent */ - -static int64_t -sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct symbol *sym_l = left->parent; - struct symbol *sym_r = right->parent; - - if (!sym_l || !sym_r) - return cmp_null(sym_l, sym_r); - - return strcmp(sym_l->name, sym_r->name); -} - -static size_t -sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width) -{ - return repsep_fprintf(fp, "%-*s", width, - self->parent ? self->parent->name : "[other]"); -} - -static unsigned int parent_symbol__col_width; - -static struct sort_entry sort_parent = { - .header = "Parent symbol", - .cmp = sort__parent_cmp, - .print = sort__parent_print, - .width = &parent_symbol__col_width, -}; - -static int sort__need_collapse = 0; -static int sort__has_parent = 0; - -struct sort_dimension { - const char *name; - struct sort_entry *entry; - int taken; -}; - -static struct sort_dimension sort_dimensions[] = { - { .name = "pid", .entry = &sort_thread, }, - { .name = "comm", .entry = &sort_comm, }, - { .name = "dso", .entry = &sort_dso, }, - { .name = "symbol", .entry = &sort_sym, }, - { .name = "parent", .entry = &sort_parent, }, -}; - -static LIST_HEAD(hist_entry__sort_list); - -static int sort_dimension__add(const char *tok) -{ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { - struct sort_dimension *sd = &sort_dimensions[i]; - - if (sd->taken) - continue; - - if (strncasecmp(tok, sd->name, strlen(tok))) - continue; - - if (sd->entry->collapse) - sort__need_collapse = 1; - - if (sd->entry == &sort_parent) { - int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); - if (ret) { - char err[BUFSIZ]; - - regerror(ret, &parent_regex, err, sizeof(err)); - fprintf(stderr, "Invalid regex: %s\n%s", - parent_pattern, err); - exit(-1); - } - sort__has_parent = 1; - } - - list_add_tail(&sd->entry->list, &hist_entry__sort_list); - sd->taken = 1; - - return 0; - } - - return -ESRCH; -} - static int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1606,7 +1304,8 @@ setup: return 0; } -static const char * const report_usage[] = { +//static const char * const report_usage[] = { +const char * const report_usage[] = { "perf report [] ", NULL }; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c new file mode 100644 index 0000000..50e75ab --- /dev/null +++ b/tools/perf/util/sort.c @@ -0,0 +1,268 @@ +#include "sort.h" + +regex_t parent_regex; +char default_parent_pattern[] = "^sys_|^do_page_fault"; +char *parent_pattern = default_parent_pattern; +char default_sort_order[] = "comm,dso,symbol"; +char *sort_order = default_sort_order; +int sort__need_collapse = 0; +int sort__has_parent = 0; + +unsigned int dsos__col_width; +unsigned int comms__col_width; +unsigned int threads__col_width; +static unsigned int parent_symbol__col_width; +char * field_sep; + +LIST_HEAD(hist_entry__sort_list); + +struct sort_entry sort_thread = { + .header = "Command: Pid", + .cmp = sort__thread_cmp, + .print = sort__thread_print, + .width = &threads__col_width, +}; + +struct sort_entry sort_comm = { + .header = "Command", + .cmp = sort__comm_cmp, + .collapse = sort__comm_collapse, + .print = sort__comm_print, + .width = &comms__col_width, +}; + +struct sort_entry sort_dso = { + .header = "Shared Object", + .cmp = sort__dso_cmp, + .print = sort__dso_print, + .width = &dsos__col_width, +}; + +struct sort_entry sort_sym = { + .header = "Symbol", + .cmp = sort__sym_cmp, + .print = sort__sym_print, +}; + +struct sort_entry sort_parent = { + .header = "Parent symbol", + .cmp = sort__parent_cmp, + .print = sort__parent_print, + .width = &parent_symbol__col_width, +}; + +struct sort_dimension { + const char *name; + struct sort_entry *entry; + int taken; +}; + +static struct sort_dimension sort_dimensions[] = { + { .name = "pid", .entry = &sort_thread, }, + { .name = "comm", .entry = &sort_comm, }, + { .name = "dso", .entry = &sort_dso, }, + { .name = "symbol", .entry = &sort_sym, }, + { .name = "parent", .entry = &sort_parent, }, +}; + +int64_t cmp_null(void *l, void *r) +{ + if (!l && !r) + return 0; + else if (!l) + return -1; + else + return 1; +} + +/* --sort pid */ + +int64_t +sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->thread->pid - left->thread->pid; +} + +int repsep_fprintf(FILE *fp, const char *fmt, ...) +{ + int n; + va_list ap; + + va_start(ap, fmt); + if (!field_sep) + n = vfprintf(fp, fmt, ap); + else { + char *bf = NULL; + n = vasprintf(&bf, fmt, ap); + if (n > 0) { + char *sep = bf; + + while (1) { + sep = strchr(sep, *field_sep); + if (sep == NULL) + break; + *sep = '.'; + } + } + fputs(bf, fp); + free(bf); + } + va_end(ap); + return n; +} + +size_t +sort__thread_print(FILE *fp, struct hist_entry *self, unsigned int width) +{ + return repsep_fprintf(fp, "%*s:%5d", width - 6, + self->thread->comm ?: "", self->thread->pid); +} + +size_t +sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width) +{ + return repsep_fprintf(fp, "%*s", width, self->thread->comm); +} + +/* --sort dso */ + +int64_t +sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct dso *dso_l = left->dso; + struct dso *dso_r = right->dso; + + if (!dso_l || !dso_r) + return cmp_null(dso_l, dso_r); + + return strcmp(dso_l->name, dso_r->name); +} + +size_t +sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width) +{ + if (self->dso) + return repsep_fprintf(fp, "%-*s", width, self->dso->name); + + return repsep_fprintf(fp, "%*llx", width, (u64)self->ip); +} + +/* --sort symbol */ + +int64_t +sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) +{ + u64 ip_l, ip_r; + + if (left->sym == right->sym) + return 0; + + ip_l = left->sym ? left->sym->start : left->ip; + ip_r = right->sym ? right->sym->start : right->ip; + + return (int64_t)(ip_r - ip_l); +} + + +size_t +sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) +{ + size_t ret = 0; + + if (verbose) + ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, + dso__symtab_origin(self->dso)); + + ret += repsep_fprintf(fp, "[%c] ", self->level); + if (self->sym) { + ret += repsep_fprintf(fp, "%s", self->sym->name); + + if (self->sym->module) + ret += repsep_fprintf(fp, "\t[%s]", + self->sym->module->name); + } else { + ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip); + } + + return ret; +} + +/* --sort comm */ + +int64_t +sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->thread->pid - left->thread->pid; +} + +int64_t +sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) +{ + char *comm_l = left->thread->comm; + char *comm_r = right->thread->comm; + + if (!comm_l || !comm_r) + return cmp_null(comm_l, comm_r); + + return strcmp(comm_l, comm_r); +} + +/* --sort parent */ + +int64_t +sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct symbol *sym_l = left->parent; + struct symbol *sym_r = right->parent; + + if (!sym_l || !sym_r) + return cmp_null(sym_l, sym_r); + + return strcmp(sym_l->name, sym_r->name); +} + +size_t +sort__parent_print(FILE *fp, struct hist_entry *self, unsigned int width) +{ + return repsep_fprintf(fp, "%-*s", width, + self->parent ? self->parent->name : "[other]"); +} + +int sort_dimension__add(const char *tok) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { + struct sort_dimension *sd = &sort_dimensions[i]; + + if (sd->taken) + continue; + + if (strncasecmp(tok, sd->name, strlen(tok))) + continue; + + if (sd->entry->collapse) + sort__need_collapse = 1; + + if (sd->entry == &sort_parent) { + int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); + if (ret) { + char err[BUFSIZ]; + + regerror(ret, &parent_regex, err, sizeof(err)); + fprintf(stderr, "Invalid regex: %s\n%s", + parent_pattern, err); + exit(-1); + } + sort__has_parent = 1; + } + + list_add_tail(&sd->entry->list, &hist_entry__sort_list); + sd->taken = 1; + + return 0; + } + + return -ESRCH; +} + diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h new file mode 100644 index 0000000..4684fd6 --- /dev/null +++ b/tools/perf/util/sort.h @@ -0,0 +1,93 @@ +#ifndef __PERF_SORT_H +#define __PERF_SORT_H +#include "../builtin.h" + +#include "util.h" + +#include "color.h" +#include +#include "cache.h" +#include +#include "symbol.h" +#include "string.h" +#include "callchain.h" +#include "strlist.h" +#include "values.h" + +#include "../perf.h" +#include "debug.h" +#include "header.h" + +#include "parse-options.h" +#include "parse-events.h" + +#include "thread.h" +#include "sort.h" + +extern regex_t parent_regex; +extern char *sort_order; +extern char default_parent_pattern[]; +extern char *parent_pattern; +extern char default_sort_order[]; +extern int sort__need_collapse; +extern int sort__has_parent; +extern char *field_sep; +extern struct sort_entry sort_comm; +extern struct sort_entry sort_dso; +extern struct sort_entry sort_sym; +extern struct sort_entry sort_parent; +extern unsigned int dsos__col_width; +extern unsigned int comms__col_width; +extern unsigned int threads__col_width; + +struct hist_entry { + struct rb_node rb_node; + + struct thread *thread; + struct map *map; + struct dso *dso; + struct symbol *sym; + struct symbol *parent; + u64 ip; + char level; + struct callchain_node callchain; + struct rb_root sorted_chain; + + u64 count; +}; + +/* + * configurable sorting bits + */ + +struct sort_entry { + struct list_head list; + + const char *header; + + int64_t (*cmp)(struct hist_entry *, struct hist_entry *); + int64_t (*collapse)(struct hist_entry *, struct hist_entry *); + size_t (*print)(FILE *fp, struct hist_entry *, unsigned int width); + unsigned int *width; + bool elide; +}; + +extern struct sort_entry sort_thread; +extern struct list_head hist_entry__sort_list; + +extern int repsep_fprintf(FILE *fp, const char *fmt, ...); +extern size_t sort__thread_print(FILE *, struct hist_entry *, unsigned int); +extern size_t sort__comm_print(FILE *, struct hist_entry *, unsigned int); +extern size_t sort__dso_print(FILE *, struct hist_entry *, unsigned int); +extern size_t sort__sym_print(FILE *, struct hist_entry *, unsigned int __used); +extern int64_t cmp_null(void *, void *); +extern int64_t sort__thread_cmp(struct hist_entry *, struct hist_entry *); +extern int64_t sort__comm_cmp(struct hist_entry *, struct hist_entry *); +extern int64_t sort__comm_collapse(struct hist_entry *, struct hist_entry *); +extern int64_t sort__dso_cmp(struct hist_entry *, struct hist_entry *); +extern int64_t sort__sym_cmp(struct hist_entry *, struct hist_entry *); +extern int64_t sort__parent_cmp(struct hist_entry *, struct hist_entry *); +extern size_t sort__parent_print(FILE *, struct hist_entry *, unsigned int); +extern int sort_dimension__add(const char *); + +#endif /* __PERF_SORT_H */ -- cgit v0.10.2 From 1889d20922d14a97b2099fa4d47587217c0ba48b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Sep 2009 21:10:44 +0200 Subject: tracing/filters: Provide basic regex support This patch provides basic support for regular expressions in filters. It supports the following types of regexp: - *match_beginning - *match_middle* - match_end* - !don't match Example: cd /debug/tracing/events/bkl/lock_kernel echo 'file == "*reiserfs*"' > filter echo 1 > enable gedit-4941 [000] 457.735437: lock_kernel: depth: 0, fs/reiserfs/namei.c:334 reiserfs_lookup() sync_supers-227 [001] 461.379985: lock_kernel: depth: 0, fs/reiserfs/super.c:69 reiserfs_sync_fs() sync_supers-227 [000] 461.383096: lock_kernel: depth: 0, fs/reiserfs/journal.c:1069 flush_commit_list() reiserfs/1-1369 [001] 461.479885: lock_kernel: depth: 0, fs/reiserfs/journal.c:3509 flush_async_commits() Every string is now handled as a regexp in the filter framework, which helps to factorize the code for handling both simple strings and regexp comparisons. (The regexp parsing code has been wildly cherry picked from ftrace.c written by Steve.) v2: Simplify the whole and drop the filter_regex file Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi Cc: Li Zefan diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 86bcff9..8d0db60 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -702,20 +702,29 @@ struct event_subsystem { }; struct filter_pred; +struct regex; typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, int val1, int val2); +typedef int (*regex_match_func)(char *str, struct regex *r, int len); + +struct regex { + char pattern[MAX_FILTER_STR_VAL]; + int len; + int field_len; + regex_match_func match; +}; + struct filter_pred { - filter_pred_fn_t fn; - u64 val; - char str_val[MAX_FILTER_STR_VAL]; - int str_len; - char *field_name; - int offset; - int not; - int op; - int pop_n; + filter_pred_fn_t fn; + u64 val; + struct regex regex; + char *field_name; + int offset; + int not; + int op; + int pop_n; }; extern void print_event_filter(struct ftrace_event_call *call, diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 189663d..d3c94c1 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -195,9 +195,9 @@ static int filter_pred_string(struct filter_pred *pred, void *event, char *addr = (char *)(event + pred->offset); int cmp, match; - cmp = strncmp(addr, pred->str_val, pred->str_len); + cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len); - match = (!cmp) ^ pred->not; + match = cmp ^ pred->not; return match; } @@ -209,9 +209,9 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event, char **addr = (char **)(event + pred->offset); int cmp, match; - cmp = strncmp(*addr, pred->str_val, pred->str_len); + cmp = pred->regex.match(*addr, &pred->regex, pred->regex.field_len); - match = (!cmp) ^ pred->not; + match = cmp ^ pred->not; return match; } @@ -235,9 +235,9 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event, char *addr = (char *)(event + str_loc); int cmp, match; - cmp = strncmp(addr, pred->str_val, str_len); + cmp = pred->regex.match(addr, &pred->regex, str_len); - match = (!cmp) ^ pred->not; + match = cmp ^ pred->not; return match; } @@ -248,6 +248,126 @@ static int filter_pred_none(struct filter_pred *pred, void *event, return 0; } +/* Basic regex callbacks */ +static int regex_match_full(char *str, struct regex *r, int len) +{ + if (strncmp(str, r->pattern, len) == 0) + return 1; + return 0; +} + +static int regex_match_front(char *str, struct regex *r, int len) +{ + if (strncmp(str, r->pattern, len) == 0) + return 1; + return 0; +} + +static int regex_match_middle(char *str, struct regex *r, int len) +{ + if (strstr(str, r->pattern)) + return 1; + return 0; +} + +static int regex_match_end(char *str, struct regex *r, int len) +{ + char *ptr = strstr(str, r->pattern); + + if (ptr && (ptr[r->len] == 0)) + return 1; + return 0; +} + +enum regex_type { + MATCH_FULL, + MATCH_FRONT_ONLY, + MATCH_MIDDLE_ONLY, + MATCH_END_ONLY, +}; + +/* + * Pass in a buffer containing a regex and this function will + * set search to point to the search part of the buffer and + * return the type of search it is (see enum above). + * This does modify buff. + * + * Returns enum type. + * search returns the pointer to use for comparison. + * not returns 1 if buff started with a '!' + * 0 otherwise. + */ +static enum regex_type +filter_parse_regex(char *buff, int len, char **search, int *not) +{ + int type = MATCH_FULL; + int i; + + if (buff[0] == '!') { + *not = 1; + buff++; + len--; + } else + *not = 0; + + *search = buff; + + for (i = 0; i < len; i++) { + if (buff[i] == '*') { + if (!i) { + *search = buff + 1; + type = MATCH_END_ONLY; + } else { + if (type == MATCH_END_ONLY) + type = MATCH_MIDDLE_ONLY; + else + type = MATCH_FRONT_ONLY; + buff[i] = 0; + break; + } + } + } + + return type; +} + +static int filter_build_regex(struct filter_pred *pred) +{ + struct regex *r = &pred->regex; + char *search, *dup; + enum regex_type type; + int not; + + type = filter_parse_regex(r->pattern, r->len, &search, ¬); + dup = kstrdup(search, GFP_KERNEL); + if (!dup) + return -ENOMEM; + + strcpy(r->pattern, dup); + kfree(dup); + + r->len = strlen(r->pattern); + + switch (type) { + case MATCH_FULL: + r->match = regex_match_full; + break; + case MATCH_FRONT_ONLY: + r->match = regex_match_front; + break; + case MATCH_MIDDLE_ONLY: + r->match = regex_match_middle; + break; + case MATCH_END_ONLY: + r->match = regex_match_end; + break; + } + + pred->not ^= not; + + return 0; +} + /* return 1 if event matches, 0 otherwise (discard) */ int filter_match_preds(struct ftrace_event_call *call, void *rec) { @@ -394,7 +514,7 @@ static void filter_clear_pred(struct filter_pred *pred) { kfree(pred->field_name); pred->field_name = NULL; - pred->str_len = 0; + pred->regex.len = 0; } static int filter_set_pred(struct filter_pred *dest, @@ -658,21 +778,24 @@ static int filter_add_pred(struct filter_parse_state *ps, } if (is_string_field(field)) { - pred->str_len = field->size; + ret = filter_build_regex(pred); + if (ret) + return ret; - if (field->filter_type == FILTER_STATIC_STRING) + if (field->filter_type == FILTER_STATIC_STRING) { fn = filter_pred_string; - else if (field->filter_type == FILTER_DYN_STRING) - fn = filter_pred_strloc; + pred->regex.field_len = field->size; + } else if (field->filter_type == FILTER_DYN_STRING) + fn = filter_pred_strloc; else { fn = filter_pred_pchar; - pred->str_len = strlen(pred->str_val); + pred->regex.field_len = strlen(pred->regex.pattern); } } else { if (field->is_signed) - ret = strict_strtoll(pred->str_val, 0, &val); + ret = strict_strtoll(pred->regex.pattern, 0, &val); else - ret = strict_strtoull(pred->str_val, 0, &val); + ret = strict_strtoull(pred->regex.pattern, 0, &val); if (ret) { parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0); return -EINVAL; @@ -1042,8 +1165,8 @@ static struct filter_pred *create_pred(int op, char *operand1, char *operand2) return NULL; } - strcpy(pred->str_val, operand2); - pred->str_len = strlen(operand2); + strcpy(pred->regex.pattern, operand2); + pred->regex.len = strlen(pred->regex.pattern); pred->op = op; -- cgit v0.10.2 From 3f6fe06dbf67b46d36fedec502300e04dffeb67a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Sep 2009 21:31:51 +0200 Subject: tracing/filters: Unify the regex parsing helpers The filter code has stolen the regex parsing function from ftrace to get the regex support. We have duplicated this code, so factorize it in the filter area and make it generally available, as the filter code is the most suited to host this feature. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi Cc: Li Zefan diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cc615f8..ddf23a2 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1655,60 +1655,6 @@ ftrace_regex_lseek(struct file *file, loff_t offset, int origin) return ret; } -enum { - MATCH_FULL, - MATCH_FRONT_ONLY, - MATCH_MIDDLE_ONLY, - MATCH_END_ONLY, -}; - -/* - * (static function - no need for kernel doc) - * - * Pass in a buffer containing a glob and this function will - * set search to point to the search part of the buffer and - * return the type of search it is (see enum above). - * This does modify buff. - * - * Returns enum type. - * search returns the pointer to use for comparison. - * not returns 1 if buff started with a '!' - * 0 otherwise. - */ -static int -ftrace_setup_glob(char *buff, int len, char **search, int *not) -{ - int type = MATCH_FULL; - int i; - - if (buff[0] == '!') { - *not = 1; - buff++; - len--; - } else - *not = 0; - - *search = buff; - - for (i = 0; i < len; i++) { - if (buff[i] == '*') { - if (!i) { - *search = buff + 1; - type = MATCH_END_ONLY; - } else { - if (type == MATCH_END_ONLY) - type = MATCH_MIDDLE_ONLY; - else - type = MATCH_FRONT_ONLY; - buff[i] = 0; - break; - } - } - } - - return type; -} - static int ftrace_match(char *str, char *regex, int len, int type) { int matched = 0; @@ -1757,7 +1703,7 @@ static void ftrace_match_records(char *buff, int len, int enable) int not; flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; - type = ftrace_setup_glob(buff, len, &search, ¬); + type = filter_parse_regex(buff, len, &search, ¬); search_len = strlen(search); @@ -1825,7 +1771,7 @@ static void ftrace_match_module_records(char *buff, char *mod, int enable) } if (strlen(buff)) { - type = ftrace_setup_glob(buff, strlen(buff), &search, ¬); + type = filter_parse_regex(buff, strlen(buff), &search, ¬); search_len = strlen(search); } @@ -1990,7 +1936,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, int count = 0; char *search; - type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); + type = filter_parse_regex(glob, strlen(glob), &search, ¬); len = strlen(search); /* we do not support '!' for function probes */ @@ -2067,7 +2013,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, else if (glob) { int not; - type = ftrace_setup_glob(glob, strlen(glob), &search, ¬); + type = filter_parse_regex(glob, strlen(glob), &search, ¬); len = strlen(search); /* we do not support '!' for function probes */ @@ -2520,7 +2466,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) return -ENODEV; /* decode regex */ - type = ftrace_setup_glob(buffer, strlen(buffer), &search, ¬); + type = filter_parse_regex(buffer, strlen(buffer), &search, ¬); if (not) return -EINVAL; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8d0db60..db6b83e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -709,6 +709,13 @@ typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, typedef int (*regex_match_func)(char *str, struct regex *r, int len); +enum regex_type { + MATCH_FULL, + MATCH_FRONT_ONLY, + MATCH_MIDDLE_ONLY, + MATCH_END_ONLY, +}; + struct regex { char pattern[MAX_FILTER_STR_VAL]; int len; @@ -727,6 +734,8 @@ struct filter_pred { int pop_n; }; +extern enum regex_type +filter_parse_regex(char *buff, int len, char **search, int *not); extern void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s); extern int apply_event_filter(struct ftrace_event_call *call, diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index d3c94c1..8c194de 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -279,15 +279,14 @@ static int regex_match_end(char *str, struct regex *r, int len) return 0; } -enum regex_type { - MATCH_FULL, - MATCH_FRONT_ONLY, - MATCH_MIDDLE_ONLY, - MATCH_END_ONLY, -}; - -/* - * Pass in a buffer containing a regex and this function will +/** + * filter_parse_regex - parse a basic regex + * @buff: the raw regex + * @len: length of the regex + * @search: will point to the beginning of the string to compare + * @not: tell whether the match will have to be inverted + * + * This passes in a buffer containing a regex and this function will * set search to point to the search part of the buffer and * return the type of search it is (see enum above). * This does modify buff. @@ -297,8 +296,7 @@ enum regex_type { * not returns 1 if buff started with a '!' * 0 otherwise. */ -static enum regex_type -filter_parse_regex(char *buff, int len, char **search, int *not) +enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not) { int type = MATCH_FULL; int i; -- cgit v0.10.2 From 7f366784f5c2b8fc0658b5b374f4c63ee42c789f Mon Sep 17 00:00:00 2001 From: Rajiv Andrade Date: Thu, 24 Sep 2009 16:27:46 -0300 Subject: TPM: increase default TPM buffer The TPM Working Group requested this communication buffer increase given that a particular TPM vendor can support a TPM_SHA1Start command input bigger than the current size. Signed-off-by: Rajiv Andrade Signed-off-by: James Morris diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index b0603b2..f4c68ab 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -31,7 +31,7 @@ enum tpm_const { TPM_MINOR = 224, /* officially assigned */ - TPM_BUFSIZE = 2048, + TPM_BUFSIZE = 4096, TPM_NUM_DEVICES = 256, }; -- cgit v0.10.2 From 9f0cf4adb6aa0bfccf675c938124e68f7f06349d Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 26 Sep 2009 14:33:01 +0200 Subject: x86: Use __builtin_object_size() to validate the buffer size for copy_from_user() gcc (4.x) supports the __builtin_object_size() builtin, which reports the size of an object that a pointer point to, when known at compile time. If the buffer size is not known at compile time, a constant -1 is returned. This patch uses this feature to add a sanity check to copy_from_user(); if the target buffer is known to be smaller than the copy size, the copy is aborted and a WARNing is emitted in memory debug mode. These extra checks compile away when the object size is not known, or if both the buffer size and the copy length are constants. Signed-off-by: Arjan van de Ven LKML-Reference: <20090926143301.2c396b94@infradead.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 632fb44..582d6ae 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -187,9 +187,26 @@ __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n); -unsigned long __must_check copy_from_user(void *to, +unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long n); + +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); +#ifdef CONFIG_DEBUG_VM + else + WARN(1, "Buffer overflow detected!\n"); +#endif + return ret; +} + long __must_check strncpy_from_user(char *dst, const char __user *src, long count); long __must_check __strncpy_from_user(char *dst, diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index db24b21..ce6fec7 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -21,10 +21,27 @@ copy_user_generic(void *to, const void *from, unsigned len); __must_check unsigned long copy_to_user(void __user *to, const void *from, unsigned len); __must_check unsigned long -copy_from_user(void *to, const void __user *from, unsigned len); +_copy_from_user(void *to, const void __user *from, unsigned len); __must_check unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len); +static inline unsigned long __must_check copy_from_user(void *to, + const void __user *from, + unsigned long n) +{ + int sz = __compiletime_object_size(to); + int ret = -EFAULT; + + if (likely(sz == -1 || sz >= n)) + ret = _copy_from_user(to, from, n); +#ifdef CONFIG_DEBUG_VM + else + WARN(1, "Buffer overflow detected!\n"); +#endif + return ret; +} + + static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 3909e3b..a0cdd8c 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -30,7 +30,7 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(__copy_user_nocache); -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(copy_to_user); EXPORT_SYMBOL(__copy_from_user_inatomic); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 6ba0f7b..4be3c41 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -78,7 +78,7 @@ ENTRY(copy_to_user) ENDPROC(copy_to_user) /* Standard copy_from_user with segment limit checking */ -ENTRY(copy_from_user) +ENTRY(_copy_from_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rsi,%rcx @@ -88,7 +88,7 @@ ENTRY(copy_from_user) jae bad_from_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_from_user) +ENDPROC(_copy_from_user) ENTRY(copy_user_generic) CFI_STARTPROC diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 1f118d4..8498684 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -874,7 +874,7 @@ EXPORT_SYMBOL(copy_to_user); * data to the requested size using zero bytes. */ unsigned long -copy_from_user(void *to, const void __user *from, unsigned long n) +_copy_from_user(void *to, const void __user *from, unsigned long n) { if (access_ok(VERIFY_READ, from, n)) n = __copy_from_user(to, from, n); @@ -882,4 +882,4 @@ copy_from_user(void *to, const void __user *from, unsigned long n) memset(to, 0, n); return n; } -EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(_copy_from_user); diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 450fa59..a3aef5d 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -37,3 +37,5 @@ #define __cold __attribute__((__cold__)) #endif + +#define __compiletime_object_size(obj) __builtin_object_size(obj, 0) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb513..8e54108 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -266,6 +266,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) #endif +/* Compile time object size, -1 for unknown */ +#ifndef __compiletime_object_size +# define __compiletime_object_size(obj) -1 +#endif /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v0.10.2 From 925936ebf35a95c290e010b784c962164e6728f3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 28 Sep 2009 17:12:49 +0200 Subject: tracing: Pushdown the bkl tracepoints calls Currently we are calling the bkl tracepoint callbacks just before the bkl lock/unlock operations, ie the tracepoint call is not inside a lock_kernel() function but inside a lock_kernel() macro. Hence the bkl trace event header must be included from smp_lock.h. This raises some nasty circular header dependencies: linux/smp_lock.h -> trace/events/bkl.h -> trace/define_trace.h -> trace/ftrace.h -> linux/ftrace_event.h -> linux/hardirq.h -> linux/smp_lock.h This results in incomplete event declarations, spurious event definitions and other kind of funny behaviours. This is hardly fixable without ugly workarounds. So instead, we push the file name, line number and function name as lock_kernel() parameters, so that we only deal with the trace event header from lib/kernel_lock.c This adds two parameters to lock_kernel() and unlock_kernel() but it should be fine wrt to performances because this pair dos not seem to be called in fast paths. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Ingo Molnar Cc: Li Zefan diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index d48cc77..2ea1dd1 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -3,7 +3,6 @@ #ifdef CONFIG_LOCK_KERNEL #include -#include #define kernel_locked() (current->lock_depth >= 0) @@ -25,18 +24,21 @@ static inline int reacquire_kernel_lock(struct task_struct *task) return 0; } -extern void __lockfunc _lock_kernel(void) __acquires(kernel_lock); -extern void __lockfunc _unlock_kernel(void) __releases(kernel_lock); +extern void __lockfunc +_lock_kernel(const char *func, const char *file, int line) +__acquires(kernel_lock); -#define lock_kernel() { \ - trace_lock_kernel(__func__, __FILE__, __LINE__); \ - _lock_kernel(); \ -} +extern void __lockfunc +_unlock_kernel(const char *func, const char *file, int line) +__releases(kernel_lock); -#define unlock_kernel() { \ - trace_unlock_kernel(__func__, __FILE__, __LINE__); \ - _unlock_kernel(); \ -} +#define lock_kernel() do { \ + _lock_kernel(__func__, __FILE__, __LINE__); \ +} while (0) + +#define unlock_kernel() do { \ + _unlock_kernel(__func__, __FILE__, __LINE__); \ +} while (0) /* * Various legacy drivers don't really need the BKL in a specific @@ -52,8 +54,8 @@ static inline void cycle_kernel_lock(void) #else -#define lock_kernel() trace_lock_kernel(__func__, __FILE__, __LINE__); -#define unlock_kernel() trace_unlock_kernel(__func__, __FILE__, __LINE__); +#define lock_kernel() +#define unlock_kernel() #define release_kernel_lock(task) do { } while(0) #define cycle_kernel_lock() do { } while(0) #define reacquire_kernel_lock(task) 0 diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index 5c10b2e..4ebfa5a 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -8,9 +8,11 @@ #include #include #include -#define CREATE_TRACE_POINTS #include +#define CREATE_TRACE_POINTS +#include + /* * The 'big kernel lock' * @@ -114,19 +116,24 @@ static inline void __unlock_kernel(void) * This cannot happen asynchronously, so we only need to * worry about other CPU's. */ -void __lockfunc _lock_kernel(void) +void __lockfunc _lock_kernel(const char *func, const char *file, int line) { - int depth = current->lock_depth+1; + int depth = current->lock_depth + 1; + + trace_lock_kernel(func, file, line); + if (likely(!depth)) __lock_kernel(); current->lock_depth = depth; } -void __lockfunc _unlock_kernel(void) +void __lockfunc _unlock_kernel(const char *func, const char *file, int line) { BUG_ON(current->lock_depth < 0); if (likely(--current->lock_depth < 0)) __unlock_kernel(); + + trace_unlock_kernel(func, file, line); } EXPORT_SYMBOL(_lock_kernel); -- cgit v0.10.2 From ff60fab71bb3b4fdbf8caf57ff3739ffd0887396 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 28 Sep 2009 14:21:22 +0200 Subject: x86: Use __builtin_memset and __builtin_memcpy for memset/memcpy GCC provides reasonable memset/memcpy functions itself, with __builtin_memset and __builtin_memcpy. For the "unknown" cases, it'll fall back to our current existing functions, but for fixed size versions it'll inline something smart. Quite often that will be the same as we have now, but sometimes it can do something smarter (for example, if the code then sets the first member of a struct, it can do a shorter memset). In addition, and this is more important, gcc knows which registers and such are not clobbered (while for our asm version it pretty much acts like a compiler barrier), so for various cases it can avoid reloading values. The effect on codesize is shown below on my typical laptop .config: text data bss dec hex filename 5605675 2041100 6525148 14171923 d83f13 vmlinux.before 5595849 2041668 6525148 14162665 d81ae9 vmlinux.after Due to some not-so-good behavior in the gcc 3.x series, this change is only done for GCC 4.x and above. Signed-off-by: Arjan van de Ven LKML-Reference: <20090928142122.6fc57e9c@infradead.org> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index ae907e6..3d3e835 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -177,10 +177,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) */ #ifndef CONFIG_KMEMCHECK + +#if (__GNUC__ >= 4) +#define memcpy(t, f, n) __builtin_memcpy(t, f, n) +#else #define memcpy(t, f, n) \ (__builtin_constant_p((n)) \ ? __constant_memcpy((t), (f), (n)) \ : __memcpy((t), (f), (n))) +#endif #else /* * kmemcheck becomes very happy if we use the REP instructions unconditionally, @@ -316,11 +321,15 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, : __memset_generic((s), (c), (count))) #define __HAVE_ARCH_MEMSET +#if (__GNUC__ >= 4) +#define memset(s, c, count) __builtin_memset(s, c, count) +#else #define memset(s, c, count) \ (__builtin_constant_p(c) \ ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ (count)) \ : __memset((s), (c), (count))) +#endif /* * find the first occurrence of byte 'c', or 1 past the area if none -- cgit v0.10.2 From af8ff04917169805b151280155bf772d3ca9bec0 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Sun, 20 Sep 2009 21:23:01 -0400 Subject: SELinux: reset the security_ops before flushing the avc cache This patch resets the security_ops to the secondary_ops before it flushes the avc. It's still possible that a task on another processor could have already passed the security_ops dereference and be executing an selinux hook function which would add a new avc entry. That entry would still not be freed. This should however help to reduce the number of needless avcs the kernel has when selinux is disabled at run time. There is no wasted memory if selinux is disabled on the command line or not compiled. Signed-off-by: Eric Paris Signed-off-by: James Morris diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index bb230d5..a985d0b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5830,12 +5830,12 @@ int selinux_disable(void) selinux_disabled = 1; selinux_enabled = 0; - /* Try to destroy the avc node cache */ - avc_disable(); - /* Reset security_ops to the secondary module, dummy or capability. */ security_ops = secondary_ops; + /* Try to destroy the avc node cache */ + avc_disable(); + /* Unregister netfilter hooks. */ selinux_nf_ip_exit(); -- cgit v0.10.2 From 3d1d07ecd2009f65cb2091563fa21f9600c36774 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Mon, 28 Sep 2009 15:32:55 +0200 Subject: perf tools: Put common histogram functions in their own file Move histogram related functions into their own files (hist.c and hist.h) and make use of them in builtin-annotate.c and builtin-report.c. Signed-off-by: John Kacur Acked-by: Frederic Weisbecker Cc: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0a9e5ae..3a99a9f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -340,6 +340,7 @@ LIB_H += util/module.h LIB_H += util/color.h LIB_H += util/values.h LIB_H += util/sort.h +LIB_H += util/hist.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -376,6 +377,7 @@ LIB_OBJS += util/trace-event-read.o LIB_OBJS += util/trace-event-info.o LIB_OBJS += util/svghelper.o LIB_OBJS += util/sort.o +LIB_OBJS += util/hist.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 059c565..df516dc 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -23,6 +23,7 @@ #include "util/parse-events.h" #include "util/thread.h" #include "util/sort.h" +#include "util/hist.h" static char const *input_name = "perf.data"; @@ -47,45 +48,6 @@ struct sym_ext { char *path; }; -/* - * histogram, sorted on item, collects counts - */ - -static struct rb_root hist; - -static int64_t -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - cmp = se->cmp(left, right); - if (cmp) - break; - } - - return cmp; -} - -static int64_t -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - int64_t (*f)(struct hist_entry *, struct hist_entry *); - - f = se->collapse ?: se->cmp; - - cmp = f(left, right); - if (cmp) - break; - } - - return cmp; -} /* * collect histogram counts @@ -163,116 +125,6 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, return 0; } -static void hist_entry__free(struct hist_entry *he) -{ - free(he); -} - -/* - * collapse the histogram - */ - -static struct rb_root collapse_hists; - -static void collapse__insert_entry(struct hist_entry *he) -{ - struct rb_node **p = &collapse_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - int64_t cmp; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - - cmp = hist_entry__collapse(iter, he); - - if (!cmp) { - iter->count += he->count; - hist_entry__free(he); - return; - } - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &collapse_hists); -} - -static void collapse__resort(void) -{ - struct rb_node *next; - struct hist_entry *n; - - if (!sort__need_collapse) - return; - - next = rb_first(&hist); - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, &hist); - collapse__insert_entry(n); - } -} - -/* - * reverse the map, sort on count. - */ - -static struct rb_root output_hists; - -static void output__insert_entry(struct hist_entry *he) -{ - struct rb_node **p = &output_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - - if (he->count > iter->count) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &output_hists); -} - -static void output__resort(void) -{ - struct rb_node *next; - struct hist_entry *n; - struct rb_root *tree = &hist; - - if (sort__need_collapse) - tree = &collapse_hists; - - next = rb_first(tree); - - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, tree); - output__insert_entry(n); - } -} - -static unsigned long total = 0, - total_mmap = 0, - total_comm = 0, - total_fork = 0, - total_unknown = 0; - static int process_sample_event(event_t *event, unsigned long offset, unsigned long head) { @@ -861,7 +713,7 @@ more: dsos__fprintf(stdout); collapse__resort(); - output__resort(); + output__resort(total); find_annotations(); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7b43504..c1a54fc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -28,6 +28,7 @@ #include "util/thread.h" #include "util/sort.h" +#include "util/hist.h" static char const *input_name = "perf.data"; @@ -55,8 +56,6 @@ static int exclude_other = 1; static char callchain_default_opt[] = "fractal,0.5"; -static int callchain; - static char __cwd[PATH_MAX]; static char *cwd = __cwd; static int cwdlen; @@ -66,50 +65,8 @@ static struct thread *last_match; static struct perf_header *header; -static -struct callchain_param callchain_param = { - .mode = CHAIN_GRAPH_REL, - .min_percent = 0.5 -}; - static u64 sample_type; -static struct rb_root hist; - -static int64_t -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - cmp = se->cmp(left, right); - if (cmp) - break; - } - - return cmp; -} - -static int64_t -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - int64_t (*f)(struct hist_entry *, struct hist_entry *); - - f = se->collapse ?: se->cmp; - - cmp = f(left, right); - if (cmp) - break; - } - - return cmp; -} - static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask) { int i; @@ -308,7 +265,6 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, return ret; } - static size_t hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) { @@ -573,117 +529,6 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, return 0; } -static void hist_entry__free(struct hist_entry *he) -{ - free(he); -} - -/* - * collapse the histogram - */ - -static struct rb_root collapse_hists; - -static void collapse__insert_entry(struct hist_entry *he) -{ - struct rb_node **p = &collapse_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - int64_t cmp; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - - cmp = hist_entry__collapse(iter, he); - - if (!cmp) { - iter->count += he->count; - hist_entry__free(he); - return; - } - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &collapse_hists); -} - -static void collapse__resort(void) -{ - struct rb_node *next; - struct hist_entry *n; - - if (!sort__need_collapse) - return; - - next = rb_first(&hist); - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, &hist); - collapse__insert_entry(n); - } -} - -/* - * reverse the map, sort on count. - */ - -static struct rb_root output_hists; - -static void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits) -{ - struct rb_node **p = &output_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - - if (callchain) - callchain_param.sort(&he->sorted_chain, &he->callchain, - min_callchain_hits, &callchain_param); - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - - if (he->count > iter->count) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &output_hists); -} - -static void output__resort(u64 total_samples) -{ - struct rb_node *next; - struct hist_entry *n; - struct rb_root *tree = &hist; - u64 min_callchain_hits; - - min_callchain_hits = total_samples * (callchain_param.min_percent / 100); - - if (sort__need_collapse) - tree = &collapse_hists; - - next = rb_first(tree); - - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, tree); - output__insert_entry(n, min_callchain_hits); - } -} - static size_t output__fprintf(FILE *fp, u64 total_samples) { struct hist_entry *pos; @@ -778,13 +623,6 @@ print_entries: return ret; } -static unsigned long total = 0, - total_mmap = 0, - total_comm = 0, - total_fork = 0, - total_unknown = 0, - total_lost = 0; - static int validate_chain(struct ip_callchain *chain, event_t *event) { unsigned int chain_size; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c new file mode 100644 index 0000000..82808dc --- /dev/null +++ b/tools/perf/util/hist.c @@ -0,0 +1,164 @@ +#include "hist.h" + +struct rb_root hist; +struct rb_root collapse_hists; +struct rb_root output_hists; +int callchain; + +struct callchain_param callchain_param = { + .mode = CHAIN_GRAPH_REL, + .min_percent = 0.5 +}; + +unsigned long total; +unsigned long total_mmap; +unsigned long total_comm; +unsigned long total_fork; +unsigned long total_unknown; +unsigned long total_lost; + +/* + * histogram, sorted on item, collects counts + */ + +int64_t +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct sort_entry *se; + int64_t cmp = 0; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + cmp = se->cmp(left, right); + if (cmp) + break; + } + + return cmp; +} + +int64_t +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) +{ + struct sort_entry *se; + int64_t cmp = 0; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + int64_t (*f)(struct hist_entry *, struct hist_entry *); + + f = se->collapse ?: se->cmp; + + cmp = f(left, right); + if (cmp) + break; + } + + return cmp; +} + +void hist_entry__free(struct hist_entry *he) +{ + free(he); +} + +/* + * collapse the histogram + */ + +void collapse__insert_entry(struct hist_entry *he) +{ + struct rb_node **p = &collapse_hists.rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + int64_t cmp; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + cmp = hist_entry__collapse(iter, he); + + if (!cmp) { + iter->count += he->count; + hist_entry__free(he); + return; + } + + if (cmp < 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, &collapse_hists); +} + +void collapse__resort(void) +{ + struct rb_node *next; + struct hist_entry *n; + + if (!sort__need_collapse) + return; + + next = rb_first(&hist); + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + next = rb_next(&n->rb_node); + + rb_erase(&n->rb_node, &hist); + collapse__insert_entry(n); + } +} + +/* + * reverse the map, sort on count. + */ + +void output__insert_entry(struct hist_entry *he, u64 min_callchain_hits) +{ + struct rb_node **p = &output_hists.rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + + if (callchain) + callchain_param.sort(&he->sorted_chain, &he->callchain, + min_callchain_hits, &callchain_param); + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + if (he->count > iter->count) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, &output_hists); +} + +void output__resort(u64 total_samples) +{ + struct rb_node *next; + struct hist_entry *n; + struct rb_root *tree = &hist; + u64 min_callchain_hits; + + min_callchain_hits = + total_samples * (callchain_param.min_percent / 100); + + if (sort__need_collapse) + tree = &collapse_hists; + + next = rb_first(tree); + + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + next = rb_next(&n->rb_node); + + rb_erase(&n->rb_node, tree); + output__insert_entry(n, min_callchain_hits); + } +} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h new file mode 100644 index 0000000..9a8daa1 --- /dev/null +++ b/tools/perf/util/hist.h @@ -0,0 +1,47 @@ +#ifndef __PERF_HIST_H +#define __PERF_HIST_H +#include "../builtin.h" + +#include "util.h" + +#include "color.h" +#include +#include "cache.h" +#include +#include "symbol.h" +#include "string.h" +#include "callchain.h" +#include "strlist.h" +#include "values.h" + +#include "../perf.h" +#include "debug.h" +#include "header.h" + +#include "parse-options.h" +#include "parse-events.h" + +#include "thread.h" +#include "sort.h" + +extern struct rb_root hist; +extern struct rb_root collapse_hists; +extern struct rb_root output_hists; +extern int callchain; +extern struct callchain_param callchain_param; +extern unsigned long total; +extern unsigned long total_mmap; +extern unsigned long total_comm; +extern unsigned long total_fork; +extern unsigned long total_unknown; +extern unsigned long total_lost; + +extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); +extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); +extern void hist_entry__free(struct hist_entry *); +extern void collapse__insert_entry(struct hist_entry *); +extern void collapse__resort(void); +extern void output__insert_entry(struct hist_entry *, u64); +extern void output__resort(u64); + +#endif /* __PERF_HIST_H */ -- cgit v0.10.2 From 1b46cddfccfec4cc67b187fb53d78198de6a057c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Sep 2009 14:48:46 -0300 Subject: perf tools: Use rb_tree for maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Threads can have many and kernel modules will be represented as a tree of maps as well. Ah, and for a perf.data with 146607 samples: Before: [root@doppio ~]# perf stat -r 5 perf report > /dev/null Performance counter stats for 'perf report' (5 runs): 699.823680 task-clock-msecs # 0.991 CPUs ( +- 0.454% ) 74 context-switches # 0.000 M/sec ( +- 1.709% ) 2 CPU-migrations # 0.000 M/sec ( +- 17.008% ) 23114 page-faults # 0.033 M/sec ( +- 0.000% ) 1381257019 cycles # 1973.721 M/sec ( +- 0.290% ) 1456894438 instructions # 1.055 IPC ( +- 0.007% ) 18779818 cache-references # 26.835 M/sec ( +- 0.380% ) 641799 cache-misses # 0.917 M/sec ( +- 1.200% ) 0.705972729 seconds time elapsed ( +- 0.501% ) [root@doppio ~]# After Performance counter stats for 'perf report' (5 runs): 691.261451 task-clock-msecs # 0.993 CPUs ( +- 0.307% ) 72 context-switches # 0.000 M/sec ( +- 0.829% ) 6 CPU-migrations # 0.000 M/sec ( +- 18.409% ) 23127 page-faults # 0.033 M/sec ( +- 0.000% ) 1366395876 cycles # 1976.670 M/sec ( +- 0.153% ) 1443136016 instructions # 1.056 IPC ( +- 0.012% ) 17956402 cache-references # 25.976 M/sec ( +- 0.325% ) 661924 cache-misses # 0.958 M/sec ( +- 1.335% ) 0.696127275 seconds time elapsed ( +- 0.377% ) I.e. we see some speedup too. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: "H. Peter Anvin" LKML-Reference: <20090928174846.GA3361@ghostprotocols.net> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3a99a9f..055290a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -341,6 +341,7 @@ LIB_H += util/color.h LIB_H += util/values.h LIB_H += util/sort.h LIB_H += util/hist.h +LIB_H += util/thread.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c31a5da..4c69eb5 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -3,7 +3,7 @@ #include "../perf.h" #include "util.h" -#include +#include enum { SHOW_KERNEL = 1, @@ -79,7 +79,7 @@ typedef union event_union { } event_t; struct map { - struct list_head node; + struct rb_node rb_node; u64 start; u64 end; u64 pgoff; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 45efb5d..9d0945c 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -15,7 +15,7 @@ static struct thread *thread__new(pid_t pid) self->comm = malloc(32); if (self->comm) snprintf(self->comm, 32, ":%d", self->pid); - INIT_LIST_HEAD(&self->maps); + self->maps = RB_ROOT; } return self; @@ -31,11 +31,13 @@ int thread__set_comm(struct thread *self, const char *comm) static size_t thread__fprintf(struct thread *self, FILE *fp) { - struct map *pos; + struct rb_node *nd; size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); - list_for_each_entry(pos, &self->maps, node) + for (nd = rb_first(&self->maps); nd; nd = rb_next(nd)) { + struct map *pos = rb_entry(nd, struct map, rb_node); ret += map__fprintf(pos, fp); + } return ret; } @@ -93,42 +95,90 @@ register_idle_thread(struct rb_root *threads, struct thread **last_match) return thread; } -void thread__insert_map(struct thread *self, struct map *map) +static void thread__remove_overlappings(struct thread *self, struct map *map) { - struct map *pos, *tmp; - - list_for_each_entry_safe(pos, tmp, &self->maps, node) { - if (map__overlap(pos, map)) { - if (verbose >= 2) { - printf("overlapping maps:\n"); - map__fprintf(map, stdout); - map__fprintf(pos, stdout); - } - - if (map->start <= pos->start && map->end > pos->start) - pos->start = map->end; - - if (map->end >= pos->end && map->start < pos->end) - pos->end = map->start; - - if (verbose >= 2) { - printf("after collision:\n"); - map__fprintf(pos, stdout); - } - - if (pos->start >= pos->end) { - list_del_init(&pos->node); - free(pos); - } + struct rb_node *next = rb_first(&self->maps); + + while (next) { + struct map *pos = rb_entry(next, struct map, rb_node); + next = rb_next(&pos->rb_node); + + if (!map__overlap(pos, map)) + continue; + + if (verbose >= 2) { + printf("overlapping maps:\n"); + map__fprintf(map, stdout); + map__fprintf(pos, stdout); + } + + if (map->start <= pos->start && map->end > pos->start) + pos->start = map->end; + + if (map->end >= pos->end && map->start < pos->end) + pos->end = map->start; + + if (verbose >= 2) { + printf("after collision:\n"); + map__fprintf(pos, stdout); + } + + if (pos->start >= pos->end) { + rb_erase(&pos->rb_node, &self->maps); + free(pos); } } +} + +void maps__insert(struct rb_root *maps, struct map *map) +{ + struct rb_node **p = &maps->rb_node; + struct rb_node *parent = NULL; + const u64 ip = map->start; + struct map *m; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct map, rb_node); + if (ip < m->start) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&map->rb_node, parent, p); + rb_insert_color(&map->rb_node, maps); +} + +struct map *maps__find(struct rb_root *maps, u64 ip) +{ + struct rb_node **p = &maps->rb_node; + struct rb_node *parent = NULL; + struct map *m; + + while (*p != NULL) { + parent = *p; + m = rb_entry(parent, struct map, rb_node); + if (ip < m->start) + p = &(*p)->rb_left; + else if (ip > m->end) + p = &(*p)->rb_right; + else + return m; + } + + return NULL; +} - list_add_tail(&map->node, &self->maps); +void thread__insert_map(struct thread *self, struct map *map) +{ + thread__remove_overlappings(self, map); + maps__insert(&self->maps, map); } int thread__fork(struct thread *self, struct thread *parent) { - struct map *map; + struct rb_node *nd; if (self->comm) free(self->comm); @@ -136,7 +186,8 @@ int thread__fork(struct thread *self, struct thread *parent) if (!self->comm) return -ENOMEM; - list_for_each_entry(map, &parent->maps, node) { + for (nd = rb_first(&parent->maps); nd; nd = rb_next(nd)) { + struct map *map = rb_entry(nd, struct map, rb_node); struct map *new = map__clone(map); if (!new) return -ENOMEM; @@ -146,20 +197,6 @@ int thread__fork(struct thread *self, struct thread *parent) return 0; } -struct map *thread__find_map(struct thread *self, u64 ip) -{ - struct map *pos; - - if (self == NULL) - return NULL; - - list_for_each_entry(pos, &self->maps, node) - if (ip >= pos->start && ip <= pos->end) - return pos; - - return NULL; -} - size_t threads__fprintf(FILE *fp, struct rb_root *threads) { size_t ret = 0; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 693ed1e..bbb37c1 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -2,13 +2,12 @@ #define __PERF_THREAD_H #include -#include #include #include "symbol.h" struct thread { struct rb_node rb_node; - struct list_head maps; + struct rb_root maps; pid_t pid; char shortname[3]; char *comm; @@ -21,7 +20,14 @@ struct thread * register_idle_thread(struct rb_root *threads, struct thread **last_match); void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *self, struct thread *parent); -struct map *thread__find_map(struct thread *self, u64 ip); size_t threads__fprintf(FILE *fp, struct rb_root *threads); +void maps__insert(struct rb_root *maps, struct map *map); +struct map *maps__find(struct rb_root *maps, u64 ip); + +static inline struct map *thread__find_map(struct thread *self, u64 ip) +{ + return self ? maps__find(&self->maps, ip) : NULL; +} + #endif /* __PERF_THREAD_H */ -- cgit v0.10.2 From a80deb622dba7dfb65d9e27b6b74b7c1963c3635 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Sep 2009 15:23:51 -0300 Subject: perf sched: Remove dead code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several variables are not used at all, cut'n'paste leftovers. Also check if the sample_type is RAW earlier, to avoid needless searches. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Mike Galbraith Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index ea9c15c..4470f25 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1544,16 +1544,15 @@ process_raw_event(event_t *raw_event __used, void *more_data, static int process_sample_event(event_t *event, unsigned long offset, unsigned long head) { - char level; - int show = 0; - struct dso *dso = NULL; struct thread *thread; u64 ip = event->ip.ip; u64 timestamp = -1; u32 cpu = -1; u64 period = 1; void *more_data = event->ip.__more_data; - int cpumode; + + if (!(sample_type & PERF_SAMPLE_RAW)) + return 0; thread = threads__findnew(event->ip.pid, &threads, &last_match); @@ -1589,32 +1588,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } - cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - - if (cpumode == PERF_RECORD_MISC_KERNEL) { - show = SHOW_KERNEL; - level = 'k'; - - dso = kernel_dso; - - dump_printf(" ...... dso: %s\n", dso->name); - - } else if (cpumode == PERF_RECORD_MISC_USER) { - - show = SHOW_USER; - level = '.'; - - } else { - show = SHOW_HV; - level = 'H'; - - dso = hypervisor_dso; - - dump_printf(" ...... dso: [hypervisor]\n"); - } - - if (sample_type & PERF_SAMPLE_RAW) - process_raw_event(event, more_data, cpu, timestamp, thread); + process_raw_event(event, more_data, cpu, timestamp, thread); return 0; } -- cgit v0.10.2 From cad3071424edd7854f63aa80d09473e84f49ed79 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Sep 2009 17:08:18 -0300 Subject: perf trace: Remove dead code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several variables are not used at all, cut'n'paste leftovers. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: "H. Peter Anvin" LKML-Reference: <20090928200818.GF3361@ghostprotocols.net> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e9d256e..2f93888 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -53,16 +53,12 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) static int process_sample_event(event_t *event, unsigned long offset, unsigned long head) { - char level; - int show = 0; - struct dso *dso = NULL; struct thread *thread; u64 ip = event->ip.ip; u64 timestamp = -1; u32 cpu = -1; u64 period = 1; void *more_data = event->ip.__more_data; - int cpumode; thread = threads__findnew(event->ip.pid, &threads, &last_match); @@ -98,30 +94,6 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } - cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - - if (cpumode == PERF_RECORD_MISC_KERNEL) { - show = SHOW_KERNEL; - level = 'k'; - - dso = kernel_dso; - - dump_printf(" ...... dso: %s\n", dso->name); - - } else if (cpumode == PERF_RECORD_MISC_USER) { - - show = SHOW_USER; - level = '.'; - - } else { - show = SHOW_HV; - level = 'H'; - - dso = hypervisor_dso; - - dump_printf(" ...... dso: [hypervisor]\n"); - } - if (sample_type & PERF_SAMPLE_RAW) { struct { u32 size; -- cgit v0.10.2 From 23acb98de5a4109a60b5fe3f0439389218b039d7 Mon Sep 17 00:00:00 2001 From: Rajiv Andrade Date: Wed, 30 Sep 2009 12:26:55 -0300 Subject: TPM: fix pcrread The previously sent patch: http://marc.info/?l=tpmdd-devel&m=125208945007834&w=2 Had its first hunk cropped when merged, submitting only this first hunk again. Signed-off-by: Jason Gunthorpe Cc: Debora Velarde Cc: Marcel Selhorst Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Rajiv Andrade Acked-by: Mimi Zohar Tested-by: Mimi Zohar Signed-off-by: James Morris diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index aeafac5..f06bb37 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -696,8 +696,7 @@ int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf) cmd.header.in = pcrread_header; cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx); - BUG_ON(cmd.header.in.length > READ_PCR_RESULT_SIZE); - rc = transmit_cmd(chip, &cmd, cmd.header.in.length, + rc = transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE, "attempting to read a pcr value"); if (rc == 0) -- cgit v0.10.2 From 2ccdc450e658053681202d42ac64b3638f22dc1a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 24 Sep 2009 14:24:00 -0700 Subject: perf top: Remove dead {min,max}_ip unused variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20090924212400.GA15321@ghostprotocols.net> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1ca8889..bf464ce 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -96,9 +96,6 @@ static int display_weighted = -1; * Symbols */ -static u64 min_ip; -static u64 max_ip = -1ll; - struct sym_entry { struct rb_node rb_node; struct list_head node; @@ -826,8 +823,6 @@ static int symbol_filter(struct dso *self, struct symbol *sym) static int parse_symbols(void) { - struct rb_node *node; - struct symbol *sym; int use_modules = vmlinux_name ? 1 : 0; kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry)); @@ -837,14 +832,6 @@ static int parse_symbols(void) if (dso__load_kernel(kernel_dso, vmlinux_name, symbol_filter, verbose, use_modules) <= 0) goto out_delete_dso; - node = rb_first(&kernel_dso->syms); - sym = rb_entry(node, struct symbol, rb_node); - min_ip = sym->start; - - node = rb_last(&kernel_dso->syms); - sym = rb_entry(node, struct symbol, rb_node); - max_ip = sym->end; - if (dump_symtab) dso__fprintf(kernel_dso, stderr); -- cgit v0.10.2 From 4a3127693001c61a21d1ce680db6340623f52e93 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 30 Sep 2009 13:05:23 +0200 Subject: x86: Turn the copy_from_user check into an (optional) compile time warning A previous patch added the buffer size check to copy_from_user(). One of the things learned from analyzing the result of the previous patch is that in general, gcc is really good at proving that the code contains sufficient security checks to not need to do a runtime check. But that for those cases where gcc could not prove this, there was a relatively high percentage of real security issues. This patch turns the case of "gcc cannot prove" into a compile time warning, as long as a sufficiently new gcc is in use that supports this. The objective is that these warnings will trigger developers checking new cases out before a security hole enters a linux kernel release. Signed-off-by: Arjan van de Ven Cc: Linus Torvalds Cc: "David S. Miller" Cc: James Morris Cc: Jan Beulich LKML-Reference: <20090930130523.348ae6c4@infradead.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 582d6ae..952f9e7 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -191,6 +191,13 @@ unsigned long __must_check _copy_from_user(void *to, const void __user *from, unsigned long n); + +extern void copy_from_user_overflow(void) +#ifdef CONFIG_DEBUG_STACKOVERFLOW + __compiletime_warning("copy_from_user() buffer size is not provably correct") +#endif +; + static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) @@ -200,10 +207,9 @@ static inline unsigned long __must_check copy_from_user(void *to, if (likely(sz == -1 || sz >= n)) ret = _copy_from_user(to, from, n); -#ifdef CONFIG_DEBUG_VM else - WARN(1, "Buffer overflow detected!\n"); -#endif + copy_from_user_overflow(); + return ret; } diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 8498684..e218d5d 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -883,3 +883,9 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) return n; } EXPORT_SYMBOL(_copy_from_user); + +void copy_from_user_overflow(void) +{ + WARN(1, "Buffer overflow detected!\n"); +} +EXPORT_SYMBOL(copy_from_user_overflow); diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index a3aef5d..f1709c1 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -39,3 +39,6 @@ #endif #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#if __GNUC_MINOR__ >= 4 +#define __compiletime_warning(message) __attribute__((warning(message))) +#endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8e54108..9503563 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -270,6 +270,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 #endif +#ifndef __compiletime_warning +# define __compiletime_warning(message) +#endif + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v0.10.2 From 439d473b4777de510e1322168ac6f2f377ecd5bc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 2 Oct 2009 03:29:58 -0300 Subject: perf tools: Rewrite and improve support for kernel modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Representing modules as struct map entries, backed by a DSO, etc, using /proc/modules to find where the module is loaded. DSOs now can have a short and long name, so that in verbose mode we can show exactly which .ko or vmlinux image was used. As kernel modules now are a DSO separate from the kernel, we can ask for just the hits for a particular set of kernel modules, just like we can do with shared libraries: [root@doppio linux-2.6-tip]# perf report -n --vmlinux /home/acme/git/build/tip-recvmmsg/vmlinux --modules --dsos \[drm\] | head -15 84.58% 13266 Xorg [k] drm_clflush_pages 4.02% 630 Xorg [k] trace_kmalloc.clone.0 3.95% 619 Xorg [k] drm_ioctl 2.07% 324 Xorg [k] drm_addbufs 1.68% 263 Xorg [k] drm_gem_close_ioctl 0.77% 120 Xorg [k] drm_setmaster_ioctl 0.70% 110 Xorg [k] drm_lastclose 0.68% 106 Xorg [k] drm_open 0.54% 85 Xorg [k] drm_mm_search_free [root@doppio linux-2.6-tip]# Specifying --dsos /lib/modules/2.6.31-tip/kernel/drivers/gpu/drm/drm.ko would have the same effect. Allowing specifying just 'drm.ko' is left for another patch. Processing kallsyms so that per kernel module struct map are instantiated was also left for another patch. That will allow removing the module name from each of its symbols. struct symbol was reduced by removing the ->module backpointer and moving it (well now the map) to struct symbol_entry in perf top, that is its only user right now. The total linecount went down by ~500 lines. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: "H. Peter Anvin" Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Avi Kivity Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 055290a..8e7509f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -336,7 +336,6 @@ LIB_H += util/strlist.h LIB_H += util/run-command.h LIB_H += util/sigchain.h LIB_H += util/symbol.h -LIB_H += util/module.h LIB_H += util/color.h LIB_H += util/values.h LIB_H += util/sort.h @@ -364,7 +363,6 @@ LIB_OBJS += util/usage.o LIB_OBJS += util/wrapper.o LIB_OBJS += util/sigchain.o LIB_OBJS += util/symbol.o -LIB_OBJS += util/module.o LIB_OBJS += util/color.o LIB_OBJS += util/pager.o LIB_OBJS += util/header.o diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index df516dc..7d5a3b1 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -63,6 +63,7 @@ static void hist_hit(struct hist_entry *he, u64 ip) return; sym_size = sym->end - sym->start; + ip = he->map->map_ip(he->map, ip); offset = ip - sym->start; if (offset >= sym_size) @@ -80,7 +81,7 @@ static void hist_hit(struct hist_entry *he, u64 ip) } static int -hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, +hist_entry__add(struct thread *thread, struct map *map, struct symbol *sym, u64 ip, char level) { struct rb_node **p = &hist.rb_node; @@ -89,7 +90,6 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, struct hist_entry entry = { .thread = thread, .map = map, - .dso = dso, .sym = sym, .ip = ip, .level = level, @@ -130,10 +130,10 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; int show = 0; - struct dso *dso = NULL; struct thread *thread; u64 ip = event->ip.ip; struct map *map = NULL; + struct symbol *sym = NULL; thread = threads__findnew(event->ip.pid, &threads, &last_match); @@ -155,32 +155,35 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (event->header.misc & PERF_RECORD_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; - - dso = kernel_dso; - - dump_printf(" ...... dso: %s\n", dso->name); - + sym = kernel_maps__find_symbol(ip, &map); + dump_printf(" ...... dso: %s\n", + map ? map->dso->long_name : ""); } else if (event->header.misc & PERF_RECORD_MISC_USER) { - show = SHOW_USER; level = '.'; - map = thread__find_map(thread, ip); if (map != NULL) { +got_map: ip = map->map_ip(map, ip); - dso = map->dso; + sym = map->dso->find_symbol(map->dso, ip); } else { /* * If this is outside of all known maps, * and is a negative address, try to look it * up in the kernel dso, as it might be a - * vsyscall (which executes in user-mode): + * vsyscall or vdso (which executes in user-mode). + * + * XXX This is nasty, we should have a symbol list in + * the "[vdso]" dso, but for now lets use the old + * trick of looking in the whole kernel symbol list. */ - if ((long long)ip < 0) - dso = kernel_dso; + if ((long long)ip < 0) { + map = kernel_map; + goto got_map; + } } - dump_printf(" ...... dso: %s\n", dso ? dso->name : ""); - + dump_printf(" ...... dso: %s\n", + map ? map->dso->long_name : ""); } else { show = SHOW_HV; level = 'H'; @@ -188,12 +191,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) } if (show & show_mask) { - struct symbol *sym = NULL; - - if (dso) - sym = dso->find_symbol(dso, ip); - - if (hist_entry__add(thread, map, dso, sym, ip, level)) { + if (hist_entry__add(thread, map, sym, ip, level)) { fprintf(stderr, "problem incrementing symbol count, skipping event\n"); return -1; @@ -313,7 +311,7 @@ process_event(event_t *event, unsigned long offset, unsigned long head) } static int -parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) +parse_line(FILE *file, struct symbol *sym, u64 len) { char *line = NULL, *tmp, *tmp2; static const char *prev_line; @@ -363,7 +361,7 @@ parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) const char *color; struct sym_ext *sym_ext = sym->priv; - offset = line_ip - start; + offset = line_ip - sym->start; if (offset < len) hits = sym->hist[offset]; @@ -442,7 +440,7 @@ static void free_source_line(struct symbol *sym, int len) /* Get the filename:line for the colored entries */ static void -get_source_line(struct symbol *sym, u64 start, int len, const char *filename) +get_source_line(struct symbol *sym, int len, const char *filename) { int i; char cmd[PATH_MAX * 2]; @@ -467,7 +465,7 @@ get_source_line(struct symbol *sym, u64 start, int len, const char *filename) if (sym_ext[i].percent <= 0.5) continue; - offset = start + i; + offset = sym->start + i; sprintf(cmd, "addr2line -e %s %016llx", filename, offset); fp = popen(cmd, "r"); if (!fp) @@ -519,31 +517,23 @@ static void print_summary(const char *filename) static void annotate_sym(struct dso *dso, struct symbol *sym) { - const char *filename = dso->name, *d_filename; - u64 start, end, len; + const char *filename = dso->long_name, *d_filename; + u64 len; char command[PATH_MAX*2]; FILE *file; if (!filename) return; - if (sym->module) - filename = sym->module->path; - else if (dso == kernel_dso) - filename = vmlinux_name; - - start = sym->obj_start; - if (!start) - start = sym->start; + if (full_paths) d_filename = filename; else d_filename = basename(filename); - end = start + sym->end - sym->start + 1; len = sym->end - sym->start; if (print_line) { - get_source_line(sym, start, len, filename); + get_source_line(sym, len, filename); print_summary(filename); } @@ -552,10 +542,11 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) printf("------------------------------------------------\n"); if (verbose >= 2) - printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); + printf("annotating [%p] %30s : [%p] %30s\n", + dso, dso->long_name, sym, sym->name); sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s", - (u64)start, (u64)end, filename, filename); + sym->start, sym->end, filename, filename); if (verbose >= 3) printf("doing: %s\n", command); @@ -565,7 +556,7 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) return; while (!feof(file)) { - if (parse_line(file, sym, start, len) < 0) + if (parse_line(file, sym, len) < 0) break; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c1a54fc..3ed3baf 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -349,22 +349,17 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm) static struct symbol * -resolve_symbol(struct thread *thread, struct map **mapp, - struct dso **dsop, u64 *ipp) +resolve_symbol(struct thread *thread, struct map **mapp, u64 *ipp) { - struct dso *dso = dsop ? *dsop : NULL; struct map *map = mapp ? *mapp : NULL; u64 ip = *ipp; - if (!thread) - return NULL; - - if (dso) - goto got_dso; - if (map) goto got_map; + if (!thread) + return NULL; + map = thread__find_map(thread, ip); if (map != NULL) { /* @@ -379,29 +374,29 @@ resolve_symbol(struct thread *thread, struct map **mapp, *mapp = map; got_map: ip = map->map_ip(map, ip); - - dso = map->dso; } else { /* * If this is outside of all known maps, * and is a negative address, try to look it * up in the kernel dso, as it might be a - * vsyscall (which executes in user-mode): + * vsyscall or vdso (which executes in user-mode). + * + * XXX This is nasty, we should have a symbol list in + * the "[vdso]" dso, but for now lets use the old + * trick of looking in the whole kernel symbol list. */ - if ((long long)ip < 0) - dso = kernel_dso; + if ((long long)ip < 0) { + map = kernel_map; + if (mapp) + *mapp = map; + } } - dump_printf(" ...... dso: %s\n", dso ? dso->name : ""); + dump_printf(" ...... dso: %s\n", + map ? map->dso->long_name : ""); dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip); *ipp = ip; - if (dsop) - *dsop = dso; - - if (!dso) - return NULL; -got_dso: - return dso->find_symbol(dso, ip); + return map ? map->dso->find_symbol(map->dso, ip) : NULL; } static int call__match(struct symbol *sym) @@ -413,7 +408,7 @@ static int call__match(struct symbol *sym) } static struct symbol ** -resolve_callchain(struct thread *thread, struct map *map __used, +resolve_callchain(struct thread *thread, struct map *map, struct ip_callchain *chain, struct hist_entry *entry) { u64 context = PERF_CONTEXT_MAX; @@ -430,8 +425,7 @@ resolve_callchain(struct thread *thread, struct map *map __used, for (i = 0; i < chain->nr; i++) { u64 ip = chain->ips[i]; - struct dso *dso = NULL; - struct symbol *sym; + struct symbol *sym = NULL; if (ip >= PERF_CONTEXT_MAX) { context = ip; @@ -440,17 +434,15 @@ resolve_callchain(struct thread *thread, struct map *map __used, switch (context) { case PERF_CONTEXT_HV: - dso = hypervisor_dso; break; case PERF_CONTEXT_KERNEL: - dso = kernel_dso; + sym = kernel_maps__find_symbol(ip, &map); break; default: + sym = resolve_symbol(thread, &map, &ip); break; } - sym = resolve_symbol(thread, NULL, &dso, &ip); - if (sym) { if (sort__has_parent && call__match(sym) && !entry->parent) @@ -469,7 +461,7 @@ resolve_callchain(struct thread *thread, struct map *map __used, */ static int -hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, +hist_entry__add(struct thread *thread, struct map *map, struct symbol *sym, u64 ip, struct ip_callchain *chain, char level, u64 count) { @@ -480,7 +472,6 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, struct hist_entry entry = { .thread = thread, .map = map, - .dso = dso, .sym = sym, .ip = ip, .level = level, @@ -641,7 +632,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; int show = 0; - struct dso *dso = NULL; + struct symbol *sym = NULL; struct thread *thread; u64 ip = event->ip.ip; u64 period = 1; @@ -700,35 +691,35 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) show = SHOW_KERNEL; level = 'k'; - dso = kernel_dso; - - dump_printf(" ...... dso: %s\n", dso->name); - + sym = kernel_maps__find_symbol(ip, &map); + dump_printf(" ...... dso: %s\n", + map ? map->dso->long_name : ""); } else if (cpumode == PERF_RECORD_MISC_USER) { show = SHOW_USER; level = '.'; + sym = resolve_symbol(thread, &map, &ip); } else { show = SHOW_HV; level = 'H'; - dso = hypervisor_dso; - dump_printf(" ...... dso: [hypervisor]\n"); } if (show & show_mask) { - struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); - - if (dso_list && (!dso || !dso->name || - !strlist__has_entry(dso_list, dso->name))) + if (dso_list && + (!map || !map->dso || + !(strlist__has_entry(dso_list, map->dso->short_name) || + (map->dso->short_name != map->dso->long_name && + strlist__has_entry(dso_list, map->dso->long_name))))) return 0; - if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name))) + if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) return 0; - if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { + if (hist_entry__add(thread, map, sym, ip, + chain, level, period)) { eprintf("problem incrementing symbol count, skipping event\n"); return -1; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index bf464ce..befef84 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -22,6 +22,7 @@ #include "util/symbol.h" #include "util/color.h" +#include "util/thread.h" #include "util/util.h" #include #include "util/parse-options.h" @@ -103,6 +104,7 @@ struct sym_entry { unsigned long snap_count; double weight; int skip; + struct map *map; struct source_line *source; struct source_line *lines; struct source_line **lines_tail; @@ -116,12 +118,11 @@ struct sym_entry { static void parse_source(struct sym_entry *syme) { struct symbol *sym; - struct module *module; - struct section *section = NULL; + struct map *map; FILE *file; char command[PATH_MAX*2]; - const char *path = vmlinux_name; - u64 start, end, len; + const char *path; + u64 len; if (!syme) return; @@ -132,27 +133,15 @@ static void parse_source(struct sym_entry *syme) } sym = (struct symbol *)(syme + 1); - module = sym->module; + map = syme->map; + path = map->dso->long_name; - if (module) - path = module->path; - if (!path) - return; - - start = sym->obj_start; - if (!start) - start = sym->start; - - if (module) { - section = module->sections->find_section(module->sections, ".text"); - if (section) - start -= section->vma; - } - - end = start + sym->end - sym->start + 1; len = sym->end - sym->start; - sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path); + sprintf(command, + "objdump --start-address=0x%016Lx " + "--stop-address=0x%016Lx -dS %s", + sym->start, sym->end, path); file = popen(command, "r"); if (!file) @@ -184,13 +173,11 @@ static void parse_source(struct sym_entry *syme) if (strlen(src->line)>8 && src->line[8] == ':') { src->eip = strtoull(src->line, NULL, 16); - if (section) - src->eip += section->vma; + src->eip += map->start; } if (strlen(src->line)>8 && src->line[16] == ':') { src->eip = strtoull(src->line, NULL, 16); - if (section) - src->eip += section->vma; + src->eip += map->start; } } pclose(file); @@ -242,16 +229,9 @@ static void lookup_sym_source(struct sym_entry *syme) struct symbol *symbol = (struct symbol *)(syme + 1); struct source_line *line; char pattern[PATH_MAX]; - char *idx; sprintf(pattern, "<%s>:", symbol->name); - if (symbol->module) { - idx = strstr(pattern, "\t"); - if (idx) - *idx = 0; - } - pthread_mutex_lock(&syme->source_lock); for (line = syme->lines; line; line = line->next) { if (strstr(line->line, pattern)) { @@ -513,8 +493,8 @@ static void print_sym_table(void) if (verbose) printf(" - %016llx", sym->start); printf(" : %s", sym->name); - if (sym->module) - printf("\t[%s]", sym->module->name); + if (syme->map->dso->name[0] == '[') + printf(" \t%s", syme->map->dso->name); printf("\n"); } } @@ -784,7 +764,7 @@ static const char *skip_symbols[] = { NULL }; -static int symbol_filter(struct dso *self, struct symbol *sym) +static int symbol_filter(struct map *map, struct symbol *sym) { struct sym_entry *syme; const char *name = sym->name; @@ -806,7 +786,8 @@ static int symbol_filter(struct dso *self, struct symbol *sym) strstr(name, "_text_end")) return 1; - syme = dso__sym_priv(self, sym); + syme = dso__sym_priv(map->dso, sym); + syme->map = map; pthread_mutex_init(&syme->source_lock, NULL); if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) sym_filter_entry = syme; @@ -825,22 +806,14 @@ static int parse_symbols(void) { int use_modules = vmlinux_name ? 1 : 0; - kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry)); - if (kernel_dso == NULL) + if (dsos__load_kernel(vmlinux_name, sizeof(struct sym_entry), + symbol_filter, verbose, use_modules) <= 0) return -1; - if (dso__load_kernel(kernel_dso, vmlinux_name, symbol_filter, verbose, use_modules) <= 0) - goto out_delete_dso; - if (dump_symtab) - dso__fprintf(kernel_dso, stderr); + dsos__fprintf(stderr); return 0; - -out_delete_dso: - dso__delete(kernel_dso); - kernel_dso = NULL; - return -1; } /* @@ -848,10 +821,11 @@ out_delete_dso: */ static void record_ip(u64 ip, int counter) { - struct symbol *sym = dso__find_symbol(kernel_dso, ip); + struct map *map; + struct symbol *sym = kernel_maps__find_symbol(ip, &map); if (sym != NULL) { - struct sym_entry *syme = dso__sym_priv(kernel_dso, sym); + struct sym_entry *syme = dso__sym_priv(map->dso, sym); if (!syme->skip) { syme->count[counter]++; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 4c69eb5..a39520e 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -3,6 +3,7 @@ #include "../perf.h" #include "util.h" +#include #include enum { @@ -79,7 +80,10 @@ typedef union event_union { } event_t; struct map { - struct rb_node rb_node; + union { + struct rb_node rb_node; + struct list_head node; + }; u64 start; u64 end; u64 pgoff; diff --git a/tools/perf/util/module.c b/tools/perf/util/module.c deleted file mode 100644 index 0d8c85d..0000000 --- a/tools/perf/util/module.c +++ /dev/null @@ -1,545 +0,0 @@ -#include "util.h" -#include "../perf.h" -#include "string.h" -#include "module.h" - -#include -#include -#include -#include -#include -#include - -static unsigned int crc32(const char *p, unsigned int len) -{ - int i; - unsigned int crc = 0; - - while (len--) { - crc ^= *p++; - for (i = 0; i < 8; i++) - crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0); - } - return crc; -} - -/* module section methods */ - -struct sec_dso *sec_dso__new_dso(const char *name) -{ - struct sec_dso *self = malloc(sizeof(*self) + strlen(name) + 1); - - if (self != NULL) { - strcpy(self->name, name); - self->secs = RB_ROOT; - self->find_section = sec_dso__find_section; - } - - return self; -} - -static void sec_dso__delete_section(struct section *self) -{ - free(((void *)self)); -} - -void sec_dso__delete_sections(struct sec_dso *self) -{ - struct section *pos; - struct rb_node *next = rb_first(&self->secs); - - while (next) { - pos = rb_entry(next, struct section, rb_node); - next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, &self->secs); - sec_dso__delete_section(pos); - } -} - -void sec_dso__delete_self(struct sec_dso *self) -{ - sec_dso__delete_sections(self); - free(self); -} - -static void sec_dso__insert_section(struct sec_dso *self, struct section *sec) -{ - struct rb_node **p = &self->secs.rb_node; - struct rb_node *parent = NULL; - const u64 hash = sec->hash; - struct section *s; - - while (*p != NULL) { - parent = *p; - s = rb_entry(parent, struct section, rb_node); - if (hash < s->hash) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&sec->rb_node, parent, p); - rb_insert_color(&sec->rb_node, &self->secs); -} - -struct section *sec_dso__find_section(struct sec_dso *self, const char *name) -{ - struct rb_node *n; - u64 hash; - int len; - - if (self == NULL) - return NULL; - - len = strlen(name); - hash = crc32(name, len); - - n = self->secs.rb_node; - - while (n) { - struct section *s = rb_entry(n, struct section, rb_node); - - if (hash < s->hash) - n = n->rb_left; - else if (hash > s->hash) - n = n->rb_right; - else { - if (!strcmp(name, s->name)) - return s; - else - n = rb_next(&s->rb_node); - } - } - - return NULL; -} - -static size_t sec_dso__fprintf_section(struct section *self, FILE *fp) -{ - return fprintf(fp, "name:%s vma:%llx path:%s\n", - self->name, self->vma, self->path); -} - -size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp) -{ - size_t ret = fprintf(fp, "dso: %s\n", self->name); - - struct rb_node *nd; - for (nd = rb_first(&self->secs); nd; nd = rb_next(nd)) { - struct section *pos = rb_entry(nd, struct section, rb_node); - ret += sec_dso__fprintf_section(pos, fp); - } - - return ret; -} - -static struct section *section__new(const char *name, const char *path) -{ - struct section *self = calloc(1, sizeof(*self)); - - if (!self) - goto out_failure; - - self->name = calloc(1, strlen(name) + 1); - if (!self->name) - goto out_failure; - - self->path = calloc(1, strlen(path) + 1); - if (!self->path) - goto out_failure; - - strcpy(self->name, name); - strcpy(self->path, path); - self->hash = crc32(self->name, strlen(name)); - - return self; - -out_failure: - if (self) { - if (self->name) - free(self->name); - if (self->path) - free(self->path); - free(self); - } - - return NULL; -} - -/* module methods */ - -struct mod_dso *mod_dso__new_dso(const char *name) -{ - struct mod_dso *self = malloc(sizeof(*self) + strlen(name) + 1); - - if (self != NULL) { - strcpy(self->name, name); - self->mods = RB_ROOT; - self->find_module = mod_dso__find_module; - } - - return self; -} - -static void mod_dso__delete_module(struct module *self) -{ - free(((void *)self)); -} - -void mod_dso__delete_modules(struct mod_dso *self) -{ - struct module *pos; - struct rb_node *next = rb_first(&self->mods); - - while (next) { - pos = rb_entry(next, struct module, rb_node); - next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, &self->mods); - mod_dso__delete_module(pos); - } -} - -void mod_dso__delete_self(struct mod_dso *self) -{ - mod_dso__delete_modules(self); - free(self); -} - -static void mod_dso__insert_module(struct mod_dso *self, struct module *mod) -{ - struct rb_node **p = &self->mods.rb_node; - struct rb_node *parent = NULL; - const u64 hash = mod->hash; - struct module *m; - - while (*p != NULL) { - parent = *p; - m = rb_entry(parent, struct module, rb_node); - if (hash < m->hash) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&mod->rb_node, parent, p); - rb_insert_color(&mod->rb_node, &self->mods); -} - -struct module *mod_dso__find_module(struct mod_dso *self, const char *name) -{ - struct rb_node *n; - u64 hash; - int len; - - if (self == NULL) - return NULL; - - len = strlen(name); - hash = crc32(name, len); - - n = self->mods.rb_node; - - while (n) { - struct module *m = rb_entry(n, struct module, rb_node); - - if (hash < m->hash) - n = n->rb_left; - else if (hash > m->hash) - n = n->rb_right; - else { - if (!strcmp(name, m->name)) - return m; - else - n = rb_next(&m->rb_node); - } - } - - return NULL; -} - -static size_t mod_dso__fprintf_module(struct module *self, FILE *fp) -{ - return fprintf(fp, "name:%s path:%s\n", self->name, self->path); -} - -size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp) -{ - struct rb_node *nd; - size_t ret; - - ret = fprintf(fp, "dso: %s\n", self->name); - - for (nd = rb_first(&self->mods); nd; nd = rb_next(nd)) { - struct module *pos = rb_entry(nd, struct module, rb_node); - - ret += mod_dso__fprintf_module(pos, fp); - } - - return ret; -} - -static struct module *module__new(const char *name, const char *path) -{ - struct module *self = calloc(1, sizeof(*self)); - - if (!self) - goto out_failure; - - self->name = calloc(1, strlen(name) + 1); - if (!self->name) - goto out_failure; - - self->path = calloc(1, strlen(path) + 1); - if (!self->path) - goto out_failure; - - strcpy(self->name, name); - strcpy(self->path, path); - self->hash = crc32(self->name, strlen(name)); - - return self; - -out_failure: - if (self) { - if (self->name) - free(self->name); - if (self->path) - free(self->path); - free(self); - } - - return NULL; -} - -static int mod_dso__load_sections(struct module *mod) -{ - int count = 0, path_len; - struct dirent *entry; - char *line = NULL; - char *dir_path; - DIR *dir; - size_t n; - - path_len = strlen("/sys/module/"); - path_len += strlen(mod->name); - path_len += strlen("/sections/"); - - dir_path = calloc(1, path_len + 1); - if (dir_path == NULL) - goto out_failure; - - strcat(dir_path, "/sys/module/"); - strcat(dir_path, mod->name); - strcat(dir_path, "/sections/"); - - dir = opendir(dir_path); - if (dir == NULL) - goto out_free; - - while ((entry = readdir(dir))) { - struct section *section; - char *path, *vma; - int line_len; - FILE *file; - - if (!strcmp(".", entry->d_name) || !strcmp("..", entry->d_name)) - continue; - - path = calloc(1, path_len + strlen(entry->d_name) + 1); - if (path == NULL) - break; - strcat(path, dir_path); - strcat(path, entry->d_name); - - file = fopen(path, "r"); - if (file == NULL) { - free(path); - break; - } - - line_len = getline(&line, &n, file); - if (line_len < 0) { - free(path); - fclose(file); - break; - } - - if (!line) { - free(path); - fclose(file); - break; - } - - line[--line_len] = '\0'; /* \n */ - - vma = strstr(line, "0x"); - if (!vma) { - free(path); - fclose(file); - break; - } - vma += 2; - - section = section__new(entry->d_name, path); - if (!section) { - fprintf(stderr, "load_sections: allocation error\n"); - free(path); - fclose(file); - break; - } - - hex2u64(vma, §ion->vma); - sec_dso__insert_section(mod->sections, section); - - free(path); - fclose(file); - count++; - } - - closedir(dir); - free(line); - free(dir_path); - - return count; - -out_free: - free(dir_path); - -out_failure: - return count; -} - -static int mod_dso__load_module_paths(struct mod_dso *self) -{ - struct utsname uts; - int count = 0, len, err = -1; - char *line = NULL; - FILE *file; - char *dpath, *dir; - size_t n; - - if (uname(&uts) < 0) - return err; - - len = strlen("/lib/modules/"); - len += strlen(uts.release); - len += strlen("/modules.dep"); - - dpath = calloc(1, len + 1); - if (dpath == NULL) - return err; - - strcat(dpath, "/lib/modules/"); - strcat(dpath, uts.release); - strcat(dpath, "/modules.dep"); - - file = fopen(dpath, "r"); - if (file == NULL) - goto out_failure; - - dir = dirname(dpath); - if (!dir) - goto out_failure; - strcat(dir, "/"); - - while (!feof(file)) { - struct module *module; - char *name, *path, *tmp; - FILE *modfile; - int line_len; - - line_len = getline(&line, &n, file); - if (line_len < 0) - break; - - if (!line) - break; - - line[--line_len] = '\0'; /* \n */ - - path = strchr(line, ':'); - if (!path) - break; - *path = '\0'; - - path = strdup(line); - if (!path) - break; - - if (!strstr(path, dir)) { - if (strncmp(path, "kernel/", 7)) - break; - - free(path); - path = calloc(1, strlen(dir) + strlen(line) + 1); - if (!path) - break; - strcat(path, dir); - strcat(path, line); - } - - modfile = fopen(path, "r"); - if (modfile == NULL) - break; - fclose(modfile); - - name = strdup(path); - if (!name) - break; - - name = strtok(name, "/"); - tmp = name; - - while (tmp) { - tmp = strtok(NULL, "/"); - if (tmp) - name = tmp; - } - - name = strsep(&name, "."); - if (!name) - break; - - /* Quirk: replace '-' with '_' in all modules */ - for (len = strlen(name); len; len--) { - if (*(name+len) == '-') - *(name+len) = '_'; - } - - module = module__new(name, path); - if (!module) - break; - mod_dso__insert_module(self, module); - - module->sections = sec_dso__new_dso("sections"); - if (!module->sections) - break; - - module->active = mod_dso__load_sections(module); - - if (module->active > 0) - count++; - } - - if (feof(file)) - err = count; - else - fprintf(stderr, "load_module_paths: modules.dep parsing failure!\n"); - -out_failure: - if (dpath) - free(dpath); - if (file) - fclose(file); - if (line) - free(line); - - return err; -} - -int mod_dso__load_modules(struct mod_dso *dso) -{ - int err; - - err = mod_dso__load_module_paths(dso); - - return err; -} diff --git a/tools/perf/util/module.h b/tools/perf/util/module.h deleted file mode 100644 index 098e041..0000000 --- a/tools/perf/util/module.h +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef __PERF_MODULE_ -#define __PERF_MODULE_ 1 - -#include -#include "../types.h" -#include -#include - -struct section { - struct rb_node rb_node; - u64 hash; - u64 vma; - char *name; - char *path; -}; - -struct sec_dso { - struct list_head node; - struct rb_root secs; - struct section *(*find_section)(struct sec_dso *, const char *name); - char name[0]; -}; - -struct module { - struct rb_node rb_node; - u64 hash; - char *name; - char *path; - struct sec_dso *sections; - int active; -}; - -struct mod_dso { - struct list_head node; - struct rb_root mods; - struct module *(*find_module)(struct mod_dso *, const char *name); - char name[0]; -}; - -struct sec_dso *sec_dso__new_dso(const char *name); -void sec_dso__delete_sections(struct sec_dso *self); -void sec_dso__delete_self(struct sec_dso *self); -size_t sec_dso__fprintf(struct sec_dso *self, FILE *fp); -struct section *sec_dso__find_section(struct sec_dso *self, const char *name); - -struct mod_dso *mod_dso__new_dso(const char *name); -void mod_dso__delete_modules(struct mod_dso *self); -void mod_dso__delete_self(struct mod_dso *self); -size_t mod_dso__fprintf(struct mod_dso *self, FILE *fp); -struct module *mod_dso__find_module(struct mod_dso *self, const char *name); -int mod_dso__load_modules(struct mod_dso *dso); - -#endif /* __PERF_MODULE_ */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 50e75ab..40c9acd 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -129,20 +129,32 @@ sort__comm_print(FILE *fp, struct hist_entry *self, unsigned int width) int64_t sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) { - struct dso *dso_l = left->dso; - struct dso *dso_r = right->dso; + struct dso *dso_l = left->map ? left->map->dso : NULL; + struct dso *dso_r = right->map ? right->map->dso : NULL; + const char *dso_name_l, *dso_name_r; if (!dso_l || !dso_r) return cmp_null(dso_l, dso_r); - return strcmp(dso_l->name, dso_r->name); + if (verbose) { + dso_name_l = dso_l->long_name; + dso_name_r = dso_r->long_name; + } else { + dso_name_l = dso_l->short_name; + dso_name_r = dso_r->short_name; + } + + return strcmp(dso_name_l, dso_name_r); } size_t sort__dso_print(FILE *fp, struct hist_entry *self, unsigned int width) { - if (self->dso) - return repsep_fprintf(fp, "%-*s", width, self->dso->name); + if (self->map && self->map->dso) { + const char *dso_name = !verbose ? self->map->dso->short_name : + self->map->dso->long_name; + return repsep_fprintf(fp, "%-*s", width, dso_name); + } return repsep_fprintf(fp, "%*llx", width, (u64)self->ip); } @@ -169,20 +181,16 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used) { size_t ret = 0; - if (verbose) - ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, - dso__symtab_origin(self->dso)); + if (verbose) { + char o = self->map ? dso__symtab_origin(self->map->dso) : '!'; + ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip, o); + } ret += repsep_fprintf(fp, "[%c] ", self->level); - if (self->sym) { + if (self->sym) ret += repsep_fprintf(fp, "%s", self->sym->name); - - if (self->sym->module) - ret += repsep_fprintf(fp, "\t[%s]", - self->sym->module->name); - } else { + else ret += repsep_fprintf(fp, "%#016llx", (u64)self->ip); - } return ret; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 4684fd6..13806d7 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -42,18 +42,15 @@ extern unsigned int threads__col_width; struct hist_entry { struct rb_node rb_node; - + u64 count; struct thread *thread; struct map *map; - struct dso *dso; struct symbol *sym; - struct symbol *parent; u64 ip; char level; + struct symbol *parent; struct callchain_node callchain; struct rb_root sorted_chain; - - u64 count; }; /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 559fb06..e882968 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2,12 +2,14 @@ #include "../perf.h" #include "string.h" #include "symbol.h" +#include "thread.h" #include "debug.h" #include #include #include +#include const char *sym_hist_filter; @@ -18,12 +20,15 @@ enum dso_origin { DSO__ORIG_UBUNTU, DSO__ORIG_BUILDID, DSO__ORIG_DSO, + DSO__ORIG_KMODULE, DSO__ORIG_NOT_FOUND, }; -static struct symbol *symbol__new(u64 start, u64 len, - const char *name, unsigned int priv_size, - u64 obj_start, int v) +static void dsos__add(struct dso *dso); +static struct dso *dsos__find(const char *name); + +static struct symbol *symbol__new(u64 start, u64 len, const char *name, + unsigned int priv_size, int v) { size_t namelen = strlen(name) + 1; struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); @@ -32,10 +37,9 @@ static struct symbol *symbol__new(u64 start, u64 len, return NULL; if (v >= 2) - printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n", - (u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); + printf("new symbol: %016Lx [%08lx]: %s, hist: %p\n", + start, (unsigned long)len, name, self->hist); - self->obj_start= obj_start; self->hist = NULL; self->hist_sum = 0; @@ -60,12 +64,8 @@ static void symbol__delete(struct symbol *self, unsigned int priv_size) static size_t symbol__fprintf(struct symbol *self, FILE *fp) { - if (!self->module) - return fprintf(fp, " %llx-%llx %s\n", + return fprintf(fp, " %llx-%llx %s\n", self->start, self->end, self->name); - else - return fprintf(fp, " %llx-%llx %s \t[%s]\n", - self->start, self->end, self->name, self->module->name); } struct dso *dso__new(const char *name, unsigned int sym_priv_size) @@ -74,6 +74,8 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size) if (self != NULL) { strcpy(self->name, name); + self->long_name = self->name; + self->short_name = self->name; self->syms = RB_ROOT; self->sym_priv_size = sym_priv_size; self->find_symbol = dso__find_symbol; @@ -100,6 +102,8 @@ static void dso__delete_symbols(struct dso *self) void dso__delete(struct dso *self) { dso__delete_symbols(self); + if (self->long_name != self->name) + free(self->long_name); free(self); } @@ -147,7 +151,7 @@ struct symbol *dso__find_symbol(struct dso *self, u64 ip) size_t dso__fprintf(struct dso *self, FILE *fp) { - size_t ret = fprintf(fp, "dso: %s\n", self->name); + size_t ret = fprintf(fp, "dso: %s\n", self->long_name); struct rb_node *nd; for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) { @@ -158,7 +162,8 @@ size_t dso__fprintf(struct dso *self, FILE *fp) return ret; } -static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int v) +static int dso__load_kallsyms(struct dso *self, struct map *map, + symbol_filter_t filter, int v) { struct rb_node *nd, *prevnd; char *line = NULL; @@ -200,12 +205,12 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int v) * Well fix up the end later, when we have all sorted. */ sym = symbol__new(start, 0xdead, line + len + 2, - self->sym_priv_size, 0, v); + self->sym_priv_size, v); if (sym == NULL) goto out_delete_line; - if (filter && filter(self, sym)) + if (filter && filter(map, sym)) symbol__delete(sym, self->sym_priv_size); else { dso__insert_symbol(self, sym); @@ -241,14 +246,15 @@ out_failure: return -1; } -static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int v) +static int dso__load_perf_map(struct dso *self, struct map *map, + symbol_filter_t filter, int v) { char *line = NULL; size_t n; FILE *file; int nr_syms = 0; - file = fopen(self->name, "r"); + file = fopen(self->long_name, "r"); if (file == NULL) goto out_failure; @@ -279,12 +285,12 @@ static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int v) continue; sym = symbol__new(start, size, line + len, - self->sym_priv_size, start, v); + self->sym_priv_size, v); if (sym == NULL) goto out_delete_line; - if (filter && filter(self, sym)) + if (filter && filter(map, sym)) symbol__delete(sym, self->sym_priv_size); else { dso__insert_symbol(self, sym); @@ -410,7 +416,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, int v) Elf *elf; int nr = 0, symidx, fd, err = 0; - fd = open(self->name, O_RDONLY); + fd = open(self->long_name, O_RDONLY); if (fd < 0) goto out; @@ -478,7 +484,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, int v) "%s@plt", elf_sym__name(&sym, symstrs)); f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size, 0, v); + sympltname, self->sym_priv_size, v); if (!f) goto out_elf_end; @@ -496,7 +502,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, int v) "%s@plt", elf_sym__name(&sym, symstrs)); f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size, 0, v); + sympltname, self->sym_priv_size, v); if (!f) goto out_elf_end; @@ -515,12 +521,13 @@ out_close: return nr; out: fprintf(stderr, "%s: problems reading %s PLT info.\n", - __func__, self->name); + __func__, self->long_name); return 0; } -static int dso__load_sym(struct dso *self, int fd, const char *name, - symbol_filter_t filter, int v, struct module *mod) +static int dso__load_sym(struct dso *self, struct map *map, const char *name, + int fd, symbol_filter_t filter, int kernel, + int kmodule, int v) { Elf_Data *symstrs, *secstrs; uint32_t nr_syms; @@ -532,7 +539,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, GElf_Sym sym; Elf_Scn *sec, *sec_strndx; Elf *elf; - int nr = 0, kernel = !strcmp("[kernel]", self->name); + int nr = 0; elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); if (elf == NULL) { @@ -589,8 +596,6 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, struct symbol *f; const char *elf_name; char *demangled; - u64 obj_start; - struct section *section = NULL; int is_label = elf_sym__is_label(&sym); const char *section_name; @@ -607,7 +612,6 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, continue; section_name = elf_sec__name(&shdr, secstrs); - obj_start = sym.st_value; if (self->adjust_symbols) { if (v >= 2) @@ -615,18 +619,8 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); sym.st_value -= shdr.sh_addr - shdr.sh_offset; - } - - if (mod) { - section = mod->sections->find_section(mod->sections, section_name); - if (section) - sym.st_value += section->vma; - else { - fprintf(stderr, "dso__load_sym() module %s lookup of %s failed\n", - mod->name, section_name); - goto out_elf_end; - } - } + } else if (kmodule) + sym.st_value += shdr.sh_offset; /* * We need to figure out if the object was created from C++ sources * DWARF DW_compile_unit has this, but we don't always have access @@ -638,15 +632,14 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, elf_name = demangled; f = symbol__new(sym.st_value, sym.st_size, elf_name, - self->sym_priv_size, obj_start, v); + self->sym_priv_size, v); free(demangled); if (!f) goto out_elf_end; - if (filter && filter(self, f)) + if (filter && filter(map, f)) symbol__delete(f, self->sym_priv_size); else { - f->module = mod; dso__insert_symbol(self, f); nr++; } @@ -671,7 +664,7 @@ static char *dso__read_build_id(struct dso *self, int v) char *build_id = NULL, *bid; unsigned char *raw; Elf *elf; - int fd = open(self->name, O_RDONLY); + int fd = open(self->long_name, O_RDONLY); if (fd < 0) goto out; @@ -680,7 +673,7 @@ static char *dso__read_build_id(struct dso *self, int v) if (elf == NULL) { if (v) fprintf(stderr, "%s: cannot read %s ELF file.\n", - __func__, self->name); + __func__, self->long_name); goto out_close; } @@ -709,7 +702,7 @@ static char *dso__read_build_id(struct dso *self, int v) bid += 2; } if (v >= 2) - printf("%s(%s): %s\n", __func__, self->name, build_id); + printf("%s(%s): %s\n", __func__, self->long_name, build_id); out_elf_end: elf_end(elf); out_close: @@ -727,6 +720,7 @@ char dso__symtab_origin(const struct dso *self) [DSO__ORIG_UBUNTU] = 'u', [DSO__ORIG_BUILDID] = 'b', [DSO__ORIG_DSO] = 'd', + [DSO__ORIG_KMODULE] = 'K', }; if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND) @@ -734,7 +728,7 @@ char dso__symtab_origin(const struct dso *self) return origin[self->origin]; } -int dso__load(struct dso *self, symbol_filter_t filter, int v) +int dso__load(struct dso *self, struct map *map, symbol_filter_t filter, int v) { int size = PATH_MAX; char *name = malloc(size), *build_id = NULL; @@ -747,7 +741,7 @@ int dso__load(struct dso *self, symbol_filter_t filter, int v) self->adjust_symbols = 0; if (strncmp(self->name, "/tmp/perf-", 10) == 0) { - ret = dso__load_perf_map(self, filter, v); + ret = dso__load_perf_map(self, map, filter, v); self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT : DSO__ORIG_NOT_FOUND; return ret; @@ -760,10 +754,12 @@ more: self->origin++; switch (self->origin) { case DSO__ORIG_FEDORA: - snprintf(name, size, "/usr/lib/debug%s.debug", self->name); + snprintf(name, size, "/usr/lib/debug%s.debug", + self->long_name); break; case DSO__ORIG_UBUNTU: - snprintf(name, size, "/usr/lib/debug%s", self->name); + snprintf(name, size, "/usr/lib/debug%s", + self->long_name); break; case DSO__ORIG_BUILDID: build_id = dso__read_build_id(self, v); @@ -777,7 +773,7 @@ more: self->origin++; /* Fall thru */ case DSO__ORIG_DSO: - snprintf(name, size, "%s", self->name); + snprintf(name, size, "%s", self->long_name); break; default: @@ -787,7 +783,7 @@ more: fd = open(name, O_RDONLY); } while (fd < 0); - ret = dso__load_sym(self, fd, name, filter, v, NULL); + ret = dso__load_sym(self, map, name, fd, filter, 0, 0, v); close(fd); /* @@ -808,89 +804,247 @@ out: return ret; } -static int dso__load_module(struct dso *self, struct mod_dso *mods, const char *name, - symbol_filter_t filter, int v) +static struct rb_root kernel_maps; +struct map *kernel_map; + +static void kernel_maps__insert(struct map *map) { - struct module *mod = mod_dso__find_module(mods, name); - int err = 0, fd; + maps__insert(&kernel_maps, map); +} - if (mod == NULL || !mod->active) - return err; +struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp) +{ + /* + * We can't have kernel_map in kernel_maps because it spans an address + * space that includes the modules. The right way to fix this is to + * create several maps, so that we don't have overlapping ranges with + * modules. For now lets look first on the kernel dso. + */ + struct map *map = maps__find(&kernel_maps, ip); + struct symbol *sym; + + if (map) { + ip = map->map_ip(map, ip); + sym = map->dso->find_symbol(map->dso, ip); + } else { + map = kernel_map; + sym = map->dso->find_symbol(map->dso, ip); + } - fd = open(mod->path, O_RDONLY); + if (mapp) + *mapp = map; - if (fd < 0) + return sym; +} + +struct map *kernel_maps__find_by_dso_name(const char *name) +{ + struct rb_node *nd; + + for (nd = rb_first(&kernel_maps); nd; nd = rb_next(nd)) { + struct map *map = rb_entry(nd, struct map, rb_node); + + if (map->dso && strcmp(map->dso->name, name) == 0) + return map; + } + + return NULL; +} + +static int dso__load_module_sym(struct dso *self, struct map *map, + symbol_filter_t filter, int v) +{ + int err = 0, fd = open(self->long_name, O_RDONLY); + + if (fd < 0) { + if (v) + fprintf(stderr, "%s: cannot open %s\n", + __func__, self->long_name); return err; + } - err = dso__load_sym(self, fd, name, filter, v, mod); + err = dso__load_sym(self, map, self->long_name, fd, filter, 0, 1, v); close(fd); return err; } -int dso__load_modules(struct dso *self, symbol_filter_t filter, int v) +static int dsos__load_modules_sym_dir(char *dirname, + symbol_filter_t filter, int v) { - struct mod_dso *mods = mod_dso__new_dso("modules"); - struct module *pos; - struct rb_node *next; - int err, count = 0; + struct dirent *dent; + int nr_symbols = 0, err; + DIR *dir = opendir(dirname); - err = mod_dso__load_modules(mods); + if (!dir) { + if (v) + fprintf(stderr, "%s: cannot open %s dir\n", __func__, + dirname); + return -1; + } - if (err <= 0) - return err; + while ((dent = readdir(dir)) != NULL) { + char path[PATH_MAX]; + + if (dent->d_type == DT_DIR) { + if (!strcmp(dent->d_name, ".") || + !strcmp(dent->d_name, "..")) + continue; + + snprintf(path, sizeof(path), "%s/%s", + dirname, dent->d_name); + err = dsos__load_modules_sym_dir(path, filter, v); + if (err < 0) + goto failure; + } else { + char *dot = strrchr(dent->d_name, '.'), + dso_name[PATH_MAX]; + struct map *map; + struct rb_node *last; + + if (dot == NULL || strcmp(dot, ".ko")) + continue; + snprintf(dso_name, sizeof(dso_name), "[%.*s]", + (int)(dot - dent->d_name), dent->d_name); + + map = kernel_maps__find_by_dso_name(dso_name); + if (map == NULL) + continue; + + snprintf(path, sizeof(path), "%s/%s", + dirname, dent->d_name); + + map->dso->long_name = strdup(path); + if (map->dso->long_name == NULL) + goto failure; + + err = dso__load_module_sym(map->dso, map, filter, v); + if (err < 0) + goto failure; + last = rb_last(&map->dso->syms); + if (last) { + struct symbol *sym; + sym = rb_entry(last, struct symbol, rb_node); + map->end = map->start + sym->end; + } + } + nr_symbols += err; + } - /* - * Iterate over modules, and load active symbols. - */ - next = rb_first(&mods->mods); - while (next) { - pos = rb_entry(next, struct module, rb_node); - err = dso__load_module(self, mods, pos->name, filter, v); + return nr_symbols; +failure: + closedir(dir); + return -1; +} - if (err < 0) - break; +static int dsos__load_modules_sym(symbol_filter_t filter, int v) +{ + struct utsname uts; + char modules_path[PATH_MAX]; - next = rb_next(&pos->rb_node); - count += err; - } + if (uname(&uts) < 0) + return -1; - if (err < 0) { - mod_dso__delete_modules(mods); - mod_dso__delete_self(mods); - return err; - } + snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel", + uts.release); - return count; + return dsos__load_modules_sym_dir(modules_path, filter, v); } -static inline void dso__fill_symbol_holes(struct dso *self) +/* + * Constructor variant for modules (where we know from /proc/modules where + * they are loaded) and for vmlinux, where only after we load all the + * symbols we'll know where it starts and ends. + */ +static struct map *map__new2(u64 start, struct dso *dso) { - struct symbol *prev = NULL; - struct rb_node *nd; + struct map *self = malloc(sizeof(*self)); - for (nd = rb_last(&self->syms); nd; nd = rb_prev(nd)) { - struct symbol *pos = rb_entry(nd, struct symbol, rb_node); + if (self != NULL) { + self->start = start; + /* + * Will be filled after we load all the symbols + */ + self->end = 0; + + self->pgoff = 0; + self->dso = dso; + self->map_ip = map__map_ip; + RB_CLEAR_NODE(&self->rb_node); + } + return self; +} + +int dsos__load_modules(unsigned int sym_priv_size, + symbol_filter_t filter, int v) +{ + char *line = NULL; + size_t n; + FILE *file = fopen("/proc/modules", "r"); + struct map *map; - if (prev) { - u64 hole = 0; - int alias = pos->start == prev->start; + if (file == NULL) + return -1; - if (!alias) - hole = prev->start - pos->end - 1; + while (!feof(file)) { + char name[PATH_MAX]; + u64 start; + struct dso *dso; + char *sep; + int line_len; - if (hole || alias) { - if (alias) - pos->end = prev->end; - else if (hole) - pos->end = prev->start - 1; - } + line_len = getline(&line, &n, file); + if (line_len < 0) + break; + + if (!line) + goto out_failure; + + line[--line_len] = '\0'; /* \n */ + + sep = strrchr(line, 'x'); + if (sep == NULL) + continue; + + hex2u64(sep + 1, &start); + + sep = strchr(line, ' '); + if (sep == NULL) + continue; + + *sep = '\0'; + + snprintf(name, sizeof(name), "[%s]", line); + dso = dso__new(name, sym_priv_size); + + if (dso == NULL) + goto out_delete_line; + + map = map__new2(start, dso); + if (map == NULL) { + dso__delete(dso); + goto out_delete_line; } - prev = pos; + + dso->origin = DSO__ORIG_KMODULE; + kernel_maps__insert(map); + dsos__add(dso); } + + free(line); + fclose(file); + + v = 1; + return dsos__load_modules_sym(filter, v); + +out_delete_line: + free(line); +out_failure: + return -1; } -static int dso__load_vmlinux(struct dso *self, const char *vmlinux, +static int dso__load_vmlinux(struct dso *self, struct map *map, + const char *vmlinux, symbol_filter_t filter, int v) { int err, fd = open(vmlinux, O_RDONLY); @@ -898,28 +1052,36 @@ static int dso__load_vmlinux(struct dso *self, const char *vmlinux, if (fd < 0) return -1; - err = dso__load_sym(self, fd, vmlinux, filter, v, NULL); - - if (err > 0) - dso__fill_symbol_holes(self); + err = dso__load_sym(self, map, self->long_name, fd, filter, 1, 0, v); close(fd); return err; } -int dso__load_kernel(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int v, int use_modules) +int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, + symbol_filter_t filter, int v, int use_modules) { int err = -1; + struct dso *dso = dso__new(vmlinux, sym_priv_size); + + if (dso == NULL) + return -1; + + dso->short_name = "[kernel]"; + kernel_map = map__new2(0, dso); + if (kernel_map == NULL) + goto out_delete_dso; + + kernel_map->map_ip = vdso__map_ip; if (vmlinux) { - err = dso__load_vmlinux(self, vmlinux, filter, v); + err = dso__load_vmlinux(dso, kernel_map, vmlinux, filter, v); if (err > 0 && use_modules) { - int syms = dso__load_modules(self, filter, v); + int syms = dsos__load_modules(sym_priv_size, filter, v); if (syms < 0) { - fprintf(stderr, "dso__load_modules failed!\n"); + fprintf(stderr, "dsos__load_modules failed!\n"); return syms; } err += syms; @@ -927,18 +1089,34 @@ int dso__load_kernel(struct dso *self, const char *vmlinux, } if (err <= 0) - err = dso__load_kallsyms(self, filter, v); + err = dso__load_kallsyms(dso, kernel_map, filter, v); + + if (err > 0) { + struct rb_node *node = rb_first(&dso->syms); + struct symbol *sym = rb_entry(node, struct symbol, rb_node); - if (err > 0) - self->origin = DSO__ORIG_KERNEL; + kernel_map->start = sym->start; + node = rb_last(&dso->syms); + sym = rb_entry(node, struct symbol, rb_node); + kernel_map->end = sym->end; + + dso->origin = DSO__ORIG_KERNEL; + /* + * XXX See kernel_maps__find_symbol comment + * kernel_maps__insert(kernel_map) + */ + dsos__add(dso); + } return err; + +out_delete_dso: + dso__delete(dso); + return -1; } LIST_HEAD(dsos); -struct dso *kernel_dso; struct dso *vdso; -struct dso *hypervisor_dso; const char *vmlinux_name = "vmlinux"; int modules; @@ -970,7 +1148,7 @@ struct dso *dsos__findnew(const char *name) if (!dso) goto out_delete_dso; - nr = dso__load(dso, NULL, verbose); + nr = dso__load(dso, NULL, NULL, verbose); if (nr < 0) { eprintf("Failed to open: %s\n", name); goto out_delete_dso; @@ -995,43 +1173,20 @@ void dsos__fprintf(FILE *fp) dso__fprintf(pos, fp); } -static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) -{ - return dso__find_symbol(dso, ip); -} - int load_kernel(void) { - int err; - - kernel_dso = dso__new("[kernel]", 0); - if (!kernel_dso) + if (dsos__load_kernel(vmlinux_name, 0, NULL, verbose, modules) <= 0) return -1; - err = dso__load_kernel(kernel_dso, vmlinux_name, NULL, verbose, modules); - if (err <= 0) { - dso__delete(kernel_dso); - kernel_dso = NULL; - } else - dsos__add(kernel_dso); - vdso = dso__new("[vdso]", 0); if (!vdso) return -1; - vdso->find_symbol = vdso__find_symbol; - dsos__add(vdso); - hypervisor_dso = dso__new("[hypervisor]", 0); - if (!hypervisor_dso) - return -1; - dsos__add(hypervisor_dso); - - return err; + return 0; } - void symbol__init(void) { elf_version(EV_CURRENT); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ee164f6..5339fd8 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -5,7 +5,6 @@ #include "types.h" #include #include -#include "module.h" #include "event.h" #ifdef HAVE_CPLUS_DEMANGLE @@ -36,10 +35,8 @@ struct symbol { struct rb_node rb_node; u64 start; u64 end; - u64 obj_start; u64 hist_sum; u64 *hist; - struct module *module; void *priv; char name[0]; }; @@ -52,12 +49,14 @@ struct dso { unsigned char adjust_symbols; unsigned char slen_calculated; unsigned char origin; + const char *short_name; + char *long_name; char name[0]; }; extern const char *sym_hist_filter; -typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym); +typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); struct dso *dso__new(const char *name, unsigned int sym_priv_size); void dso__delete(struct dso *self); @@ -69,10 +68,12 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) struct symbol *dso__find_symbol(struct dso *self, u64 ip); -int dso__load_kernel(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int verbose, int modules); -int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose); -int dso__load(struct dso *self, symbol_filter_t filter, int verbose); +int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, + symbol_filter_t filter, int verbose, int modules); +int dsos__load_modules(unsigned int sym_priv_size, symbol_filter_t filter, + int verbose); +int dso__load(struct dso *self, struct map *map, symbol_filter_t filter, + int verbose); struct dso *dsos__findnew(const char *name); void dsos__fprintf(FILE *fp); @@ -84,9 +85,8 @@ int load_kernel(void); void symbol__init(void); extern struct list_head dsos; -extern struct dso *kernel_dso; +extern struct map *kernel_map; extern struct dso *vdso; -extern struct dso *hypervisor_dso; extern const char *vmlinux_name; extern int modules; #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 9d0945c..3b56aeb 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -16,6 +16,7 @@ static struct thread *thread__new(pid_t pid) if (self->comm) snprintf(self->comm, 32, ":%d", self->pid); self->maps = RB_ROOT; + INIT_LIST_HEAD(&self->removed_maps); } return self; @@ -32,13 +33,20 @@ int thread__set_comm(struct thread *self, const char *comm) static size_t thread__fprintf(struct thread *self, FILE *fp) { struct rb_node *nd; - size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); + struct map *pos; + size_t ret = fprintf(fp, "Thread %d %s\nCurrent maps:\n", + self->pid, self->comm); for (nd = rb_first(&self->maps); nd; nd = rb_next(nd)) { - struct map *pos = rb_entry(nd, struct map, rb_node); + pos = rb_entry(nd, struct map, rb_node); ret += map__fprintf(pos, fp); } + ret = fprintf(fp, "Removed maps:\n"); + + list_for_each_entry(pos, &self->removed_maps, node) + ret += map__fprintf(pos, fp); + return ret; } @@ -112,21 +120,13 @@ static void thread__remove_overlappings(struct thread *self, struct map *map) map__fprintf(pos, stdout); } - if (map->start <= pos->start && map->end > pos->start) - pos->start = map->end; - - if (map->end >= pos->end && map->start < pos->end) - pos->end = map->start; - - if (verbose >= 2) { - printf("after collision:\n"); - map__fprintf(pos, stdout); - } - - if (pos->start >= pos->end) { - rb_erase(&pos->rb_node, &self->maps); - free(pos); - } + rb_erase(&pos->rb_node, &self->maps); + /* + * We may have references to this map, for instance in some + * hist_entry instances, so just move them to a separate + * list. + */ + list_add_tail(&pos->node, &self->removed_maps); } } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index bbb37c1..845d9b6 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -8,6 +8,7 @@ struct thread { struct rb_node rb_node; struct rb_root maps; + struct list_head removed_maps; pid_t pid; char shortname[3]; char *comm; @@ -25,6 +26,9 @@ size_t threads__fprintf(FILE *fp, struct rb_root *threads); void maps__insert(struct rb_root *maps, struct map *map); struct map *maps__find(struct rb_root *maps, u64 ip); +struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp); +struct map *kernel_maps__find_by_dso_name(const char *name); + static inline struct map *thread__find_map(struct thread *self, u64 ip) { return self ? maps__find(&self->maps, ip) : NULL; -- cgit v0.10.2 From 63312b6a6faae3f2e5577f2b001e3b504f10a2aa Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 2 Oct 2009 07:50:50 -0700 Subject: x86: Add a Kconfig option to turn the copy_from_user warnings into errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For automated testing it is useful to have the option to turn the warnings on copy_from_user() etc checks into errors: In function ‘copy_from_user’, inlined from ‘fd_copyin’ at drivers/block/floppy.c:3080, inlined from ‘fd_ioctl’ at drivers/block/floppy.c:3503: linux/arch/x86/include/asm/uaccess_32.h:213: error: call to ‘copy_from_user_overflow’ declared with attribute error: copy_from_user buffer size is not provably correct Signed-off-by: Arjan van de Ven Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <20091002075050.4e9f7641@infradead.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d105f29..1bd2e36f 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -287,4 +287,18 @@ config OPTIMIZE_INLINING If unsure, say N. +config DEBUG_STRICT_USER_COPY_CHECKS + bool "Strict copy size checks" + depends on DEBUG_KERNEL + ---help--- + Enabling this option turns a certain set of sanity checks for user + copy operations into compile time failures. + + The copy_from_user() etc checks are there to help test if there + are sufficient security checks on the length argument of + the copy operation, by having gcc prove that the argument is + within bounds. + + If unsure, or if you run an older (pre 4.4) gcc, say N. + endmenu diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 952f9e7..0c9825e 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -193,7 +193,9 @@ unsigned long __must_check _copy_from_user(void *to, extern void copy_from_user_overflow(void) -#ifdef CONFIG_DEBUG_STACKOVERFLOW +#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS + __compiletime_error("copy_from_user() buffer size is not provably correct") +#else __compiletime_warning("copy_from_user() buffer size is not provably correct") #endif ; diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index f1709c1..77542c5 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -41,4 +41,5 @@ #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) #if __GNUC_MINOR__ >= 4 #define __compiletime_warning(message) __attribute__((warning(message))) +#define __compiletime_error(message) __attribute__((error(message))) #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 9503563..88fd4b6 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -273,6 +273,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #ifndef __compiletime_warning # define __compiletime_warning(message) #endif +#ifndef __compiletime_error +# define __compiletime_error(message) +#endif /* * Prevent the compiler from merging or refetching accesses. The compiler -- cgit v0.10.2 From 98059e3463383b18fd79181179cd539b74846b47 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 1 Oct 2009 17:11:10 +0200 Subject: x86: AMD Geode LX optimizations Add CPU optimizations for AMD Geode LX. Signed-off-by: Matteo Croce LKML-Reference: <40101cc30910010811v5d15ff4cx9dd57c9cc9b4b045@mail.gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 527519b..979de29 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -406,7 +406,7 @@ config X86_CMPXCHG64 # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) config X86_MINIMUM_CPU_FAMILY int diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 30e9a26..cbf0776 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -41,7 +41,7 @@ cflags-$(CONFIG_X86_ELAN) += -march=i486 # Geode GX1 support cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx - +cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) # add at the end to overwrite eventual tuning options from earlier # cpu entries cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) -- cgit v0.10.2 From a1a138d05fa060ac4238c19a1e890aacc25ed3ba Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Sep 2009 11:20:12 -0700 Subject: tracing/kprobes: Use global event perf buffers in kprobe tracer Use new percpu global event buffer instead of stack in kprobe tracer while tracing through perf. Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Acked-by: Ingo Molnar Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Tom Zanussi LKML-Reference: <20090925182011.10157.60140.stgit@omoto> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 09cba270..97309d4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1149,35 +1149,49 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; - int size, __size, i, pc; + struct trace_entry *ent; + int size, __size, i, pc, __cpu; unsigned long irq_flags; + char *raw_data; - local_save_flags(irq_flags); pc = preempt_count(); - __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "profile buffer not large enough")) + return 0; - do { - char raw_data[size]; - struct trace_entry *ent; - /* - * Zero dead bytes from alignment to avoid stack leak - * to userspace - */ - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - entry = (struct kprobe_trace_entry *)raw_data; - ent = &entry->ent; - - tracing_generic_entry_update(ent, irq_flags, pc); - ent->type = call->id; - entry->nargs = tp->nr_args; - entry->ip = (unsigned long)kp->addr; - for (i = 0; i < tp->nr_args; i++) - entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - perf_tp_event(call->id, entry->ip, 1, entry, size); - } while (0); + /* + * Protect the non nmi buffer + * This also protects the rcu read side + */ + local_irq_save(irq_flags); + __cpu = smp_processor_id(); + + if (in_nmi()) + raw_data = rcu_dereference(trace_profile_buf_nmi); + else + raw_data = rcu_dereference(trace_profile_buf); + + if (!raw_data) + goto end; + + raw_data = per_cpu_ptr(raw_data, __cpu); + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + entry = (struct kprobe_trace_entry *)raw_data; + ent = &entry->ent; + + tracing_generic_entry_update(ent, irq_flags, pc); + ent->type = call->id; + entry->nargs = tp->nr_args; + entry->ip = (unsigned long)kp->addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + perf_tp_event(call->id, entry->ip, 1, entry, size); +end: + local_irq_restore(irq_flags); return 0; } @@ -1188,33 +1202,50 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; - int size, __size, i, pc; + struct trace_entry *ent; + int size, __size, i, pc, __cpu; unsigned long irq_flags; + char *raw_data; - local_save_flags(irq_flags); pc = preempt_count(); - __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); size = ALIGN(__size + sizeof(u32), sizeof(u64)); size -= sizeof(u32); + if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, + "profile buffer not large enough")) + return 0; + + /* + * Protect the non nmi buffer + * This also protects the rcu read side + */ + local_irq_save(irq_flags); + __cpu = smp_processor_id(); + + if (in_nmi()) + raw_data = rcu_dereference(trace_profile_buf_nmi); + else + raw_data = rcu_dereference(trace_profile_buf); + + if (!raw_data) + goto end; + + raw_data = per_cpu_ptr(raw_data, __cpu); + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ + *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; + entry = (struct kretprobe_trace_entry *)raw_data; + ent = &entry->ent; - do { - char raw_data[size]; - struct trace_entry *ent; - - *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; - entry = (struct kretprobe_trace_entry *)raw_data; - ent = &entry->ent; - - tracing_generic_entry_update(ent, irq_flags, pc); - ent->type = call->id; - entry->nargs = tp->nr_args; - entry->func = (unsigned long)tp->rp.kp.addr; - entry->ret_ip = (unsigned long)ri->ret_addr; - for (i = 0; i < tp->nr_args; i++) - entry->args[i] = call_fetch(&tp->args[i].fetch, regs); - perf_tp_event(call->id, entry->ret_ip, 1, entry, size); - } while (0); + tracing_generic_entry_update(ent, irq_flags, pc); + ent->type = call->id; + entry->nargs = tp->nr_args; + entry->func = (unsigned long)tp->rp.kp.addr; + entry->ret_ip = (unsigned long)ri->ret_addr; + for (i = 0; i < tp->nr_args; i++) + entry->args[i] = call_fetch(&tp->args[i].fetch, regs); + perf_tp_event(call->id, entry->ret_ip, 1, entry, size); +end: + local_irq_restore(irq_flags); return 0; } -- cgit v0.10.2 From c0b11d3af164947c71e2491912c5b8418900dafb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Sep 2009 11:20:38 -0700 Subject: x86: Add VIA processor instructions in opcodes decoder Add VIA processor's Padlock instructions(MONTMUL, XSHA1, XSHA256) as parts of the kernel may use them. This fixes the following crash in opcodes decoder selftests: make[2]: `scripts/unifdef' is up to date. TEST posttest Error: c145cf71: f3 0f a6 d0 repz xsha256 Error: objdump says 4 bytes, but insn_get_length() says 3 (attr:0) make[1]: *** [posttest] Error 2 make: *** [bzImage] Error 2 Reported-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Acked-by: Ingo Molnar Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Tom Zanussi LKML-Reference: <20090925182037.10157.3180.stgit@omoto> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 59e20d5..78a0daf 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -469,7 +469,7 @@ a2: CPUID a3: BT Ev,Gv a4: SHLD Ev,Gv,Ib a5: SHLD Ev,Gv,CL -a6: +a6: GrpPDLK a7: GrpRNG a8: PUSH GS (d64) a9: POP GS (d64) @@ -803,6 +803,12 @@ GrpTable: Grp16 3: prefetch T2 EndTable +GrpTable: GrpPDLK +0: MONTMUL +1: XSHA1 +2: XSHA2 +EndTable + GrpTable: GrpRNG 0: xstore-rng 1: xcrypt-ecb -- cgit v0.10.2 From 88f70d7590538e427c8405a2e02ac2624847386c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 25 Sep 2009 11:20:54 -0700 Subject: tracing/ftrace: Fix to check create_event_dir() when adding new events Check result of event_create_dir() and add ftrace_event_call to ftrace_events list only if it is succeeded. Thanks to Li for pointing it out. Signed-off-by: Masami Hiramatsu Acked-by: Steven Rostedt Acked-by: Ingo Molnar Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Andi Kleen Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: H. Peter Anvin Cc: Jason Baron Cc: K.Prasad Cc: Lai Jiangshan Cc: Li Zefan Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Tom Zanussi LKML-Reference: <20090925182054.10157.55219.stgit@omoto> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index a4b7c9a..155b5d5 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -957,12 +957,12 @@ static int __trace_add_event_call(struct ftrace_event_call *call) if (!d_events) return -ENOENT; - list_add(&call->list, &ftrace_events); ret = event_create_dir(call, d_events, &ftrace_event_id_fops, &ftrace_enable_fops, &ftrace_event_filter_fops, &ftrace_event_format_fops); - if (ret < 0) - list_del(&call->list); + if (!ret) + list_add(&call->list, &ftrace_events); + return ret; } @@ -1124,10 +1124,11 @@ static void trace_module_add_events(struct module *mod) return; } call->mod = mod; - list_add(&call->list, &ftrace_events); - event_create_dir(call, d_events, - &file_ops->id, &file_ops->enable, - &file_ops->filter, &file_ops->format); + ret = event_create_dir(call, d_events, + &file_ops->id, &file_ops->enable, + &file_ops->filter, &file_ops->format); + if (!ret) + list_add(&call->list, &ftrace_events); } } @@ -1267,10 +1268,12 @@ static __init int event_trace_init(void) continue; } } - list_add(&call->list, &ftrace_events); - event_create_dir(call, d_events, &ftrace_event_id_fops, - &ftrace_enable_fops, &ftrace_event_filter_fops, - &ftrace_event_format_fops); + ret = event_create_dir(call, d_events, &ftrace_event_id_fops, + &ftrace_enable_fops, + &ftrace_event_filter_fops, + &ftrace_event_format_fops); + if (!ret) + list_add(&call->list, &ftrace_events); } while (true) { -- cgit v0.10.2 From 9735abf11bec48bfbbb1b54772a02deb2ae0c403 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 3 Oct 2009 10:42:45 -0300 Subject: perf tools: Move hist_entry__add common code to hist.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now perf report and annotate do the callgraph/hit processing in their specialized hist_entry__add functions. Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frédéric Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7d5a3b1..8550942 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -80,48 +80,16 @@ static void hist_hit(struct hist_entry *he, u64 ip) sym->hist[offset]); } -static int -hist_entry__add(struct thread *thread, struct map *map, - struct symbol *sym, u64 ip, char level) +static int hist_entry__add(struct thread *thread, struct map *map, + struct symbol *sym, u64 ip, u64 count, char level) { - struct rb_node **p = &hist.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *he; - struct hist_entry entry = { - .thread = thread, - .map = map, - .sym = sym, - .ip = ip, - .level = level, - .count = 1, - }; - int cmp; - - while (*p != NULL) { - parent = *p; - he = rb_entry(parent, struct hist_entry, rb_node); - - cmp = hist_entry__cmp(&entry, he); - - if (!cmp) { - hist_hit(he, ip); - - return 0; - } - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - he = malloc(sizeof(*he)); - if (!he) + bool hit; + struct hist_entry *he = __hist_entry__add(thread, map, sym, NULL, ip, + count, level, &hit); + if (he == NULL) return -ENOMEM; - *he = entry; - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &hist); - + if (hit) + hist_hit(he, ip); return 0; } @@ -191,7 +159,7 @@ got_map: } if (show & show_mask) { - if (hist_entry__add(thread, map, sym, ip, level)) { + if (hist_entry__add(thread, map, sym, ip, 1, level)) { fprintf(stderr, "problem incrementing symbol count, skipping event\n"); return -1; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3ed3baf..0e83ffc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -407,9 +407,9 @@ static int call__match(struct symbol *sym) return 0; } -static struct symbol ** -resolve_callchain(struct thread *thread, struct map *map, - struct ip_callchain *chain, struct hist_entry *entry) +static struct symbol **resolve_callchain(struct thread *thread, struct map *map, + struct ip_callchain *chain, + struct symbol **parent) { u64 context = PERF_CONTEXT_MAX; struct symbol **syms = NULL; @@ -444,9 +444,8 @@ resolve_callchain(struct thread *thread, struct map *map, } if (sym) { - if (sort__has_parent && call__match(sym) && - !entry->parent) - entry->parent = sym; + if (sort__has_parent && !*parent && call__match(sym)) + *parent = sym; if (!callchain) break; syms[i] = sym; @@ -465,57 +464,27 @@ hist_entry__add(struct thread *thread, struct map *map, struct symbol *sym, u64 ip, struct ip_callchain *chain, char level, u64 count) { - struct rb_node **p = &hist.rb_node; - struct rb_node *parent = NULL; + struct symbol **syms = NULL, *parent = NULL; + bool hit; struct hist_entry *he; - struct symbol **syms = NULL; - struct hist_entry entry = { - .thread = thread, - .map = map, - .sym = sym, - .ip = ip, - .level = level, - .count = count, - .parent = NULL, - .sorted_chain = RB_ROOT - }; - int cmp; if ((sort__has_parent || callchain) && chain) - syms = resolve_callchain(thread, map, chain, &entry); - - while (*p != NULL) { - parent = *p; - he = rb_entry(parent, struct hist_entry, rb_node); - - cmp = hist_entry__cmp(&entry, he); + syms = resolve_callchain(thread, map, chain, &parent); - if (!cmp) { - he->count += count; - if (callchain) { - append_chain(&he->callchain, chain, syms); - free(syms); - } - return 0; - } + he = __hist_entry__add(thread, map, sym, parent, + ip, count, level, &hit); + if (he == NULL) + return -ENOMEM; - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } + if (hit) + he->count += count; - he = malloc(sizeof(*he)); - if (!he) - return -ENOMEM; - *he = entry; if (callchain) { - callchain_init(&he->callchain); + if (!hit) + callchain_init(&he->callchain); append_chain(&he->callchain, chain, syms); free(syms); } - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &hist); return 0; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 82808dc..7393a02 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -21,6 +21,52 @@ unsigned long total_lost; * histogram, sorted on item, collects counts */ +struct hist_entry *__hist_entry__add(struct thread *thread, struct map *map, + struct symbol *sym, + struct symbol *sym_parent, + u64 ip, u64 count, char level, bool *hit) +{ + struct rb_node **p = &hist.rb_node; + struct rb_node *parent = NULL; + struct hist_entry *he; + struct hist_entry entry = { + .thread = thread, + .map = map, + .sym = sym, + .ip = ip, + .level = level, + .count = count, + .parent = sym_parent, + }; + int cmp; + + while (*p != NULL) { + parent = *p; + he = rb_entry(parent, struct hist_entry, rb_node); + + cmp = hist_entry__cmp(&entry, he); + + if (!cmp) { + *hit = true; + return he; + } + + if (cmp < 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + he = malloc(sizeof(*he)); + if (!he) + return NULL; + *he = entry; + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, &hist); + *hit = false; + return he; +} + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 9a8daa1..ac2149c 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -36,6 +36,9 @@ extern unsigned long total_fork; extern unsigned long total_unknown; extern unsigned long total_lost; +struct hist_entry *__hist_entry__add(struct thread *thread, struct map *map, + struct symbol *sym, struct symbol *parent, + u64 ip, u64 count, char level, bool *hit); extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); extern void hist_entry__free(struct hist_entry *); -- cgit v0.10.2 From ec218fc4a796a1b584741d59ef22615d96981188 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 3 Oct 2009 20:30:48 -0300 Subject: perf tools: Remove show_mask bitmask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As it was not being exposed via any command line and with --dsos/--comms we can do this and even more, like asking for just kernel + some module: [root@doppio linux-2.6-tip]# perf report --dsos \[kernel\],\[drm\] --vmlinux /home/acme/git/build/tip-recvmmsg/vmlinux --modules | head -15 # Samples: 619669 # # Overhead Command Shared Object Symbol # ........ ............... ............. ...... # 7.12% swapper [kernel] [k] read_hpet 6.86% init [kernel] [k] read_hpet 6.22% init [kernel] [k] mwait_idle_with_hints 5.34% swapper [kernel] [k] mwait_idle_with_hints 3.01% firefox [kernel] [.] vread_hpet 2.14% Xorg [drm] [k] drm_clflush_pages 2.09% pidgin [kernel] [.] vread_hpet 1.58% npviewer.bin [kernel] [.] vread_hpet 1.37% swapper [kernel] [k] hpet_next_event 1.23% Xorg [kernel] [k] read_hpet [root@doppio linux-2.6-tip]# Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20091003233048.GA30535@ghostprotocols.net> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8e7509f..2c309a5 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -323,6 +323,7 @@ LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h LIB_H += util/include/linux/list.h LIB_H += perf.h +LIB_H += util/event.h LIB_H += util/types.h LIB_H += util/levenshtein.h LIB_H += util/parse-options.h diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 8550942..35ed97b 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -29,7 +29,6 @@ static char const *input_name = "perf.data"; static int force; static int input; -static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; static int full_paths; @@ -97,7 +96,6 @@ static int process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; - int show = 0; struct thread *thread; u64 ip = event->ip.ip; struct map *map = NULL; @@ -121,13 +119,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) } if (event->header.misc & PERF_RECORD_MISC_KERNEL) { - show = SHOW_KERNEL; level = 'k'; sym = kernel_maps__find_symbol(ip, &map); dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); } else if (event->header.misc & PERF_RECORD_MISC_USER) { - show = SHOW_USER; level = '.'; map = thread__find_map(thread, ip); if (map != NULL) { @@ -153,17 +149,14 @@ got_map: dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); } else { - show = SHOW_HV; level = 'H'; dump_printf(" ...... dso: [hypervisor]\n"); } - if (show & show_mask) { - if (hist_entry__add(thread, map, sym, ip, 1, level)) { - fprintf(stderr, - "problem incrementing symbol count, skipping event\n"); - return -1; - } + if (hist_entry__add(thread, map, sym, ip, 1, level)) { + fprintf(stderr, "problem incrementing symbol count, " + "skipping event\n"); + return -1; } total++; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0e83ffc..fe4aadc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -38,7 +38,6 @@ static struct strlist *dso_list, *comm_list, *sym_list; static int force; static int input; -static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; static int full_paths; static int show_nr_samples; @@ -600,7 +599,6 @@ static int process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; - int show = 0; struct symbol *sym = NULL; struct thread *thread; u64 ip = event->ip.ip; @@ -657,42 +655,35 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; if (cpumode == PERF_RECORD_MISC_KERNEL) { - show = SHOW_KERNEL; level = 'k'; - sym = kernel_maps__find_symbol(ip, &map); dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); } else if (cpumode == PERF_RECORD_MISC_USER) { - - show = SHOW_USER; level = '.'; sym = resolve_symbol(thread, &map, &ip); } else { - show = SHOW_HV; level = 'H'; - dump_printf(" ...... dso: [hypervisor]\n"); } - if (show & show_mask) { - if (dso_list && - (!map || !map->dso || - !(strlist__has_entry(dso_list, map->dso->short_name) || - (map->dso->short_name != map->dso->long_name && - strlist__has_entry(dso_list, map->dso->long_name))))) - return 0; + if (dso_list && + (!map || !map->dso || + !(strlist__has_entry(dso_list, map->dso->short_name) || + (map->dso->short_name != map->dso->long_name && + strlist__has_entry(dso_list, map->dso->long_name))))) + return 0; - if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) - return 0; + if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) + return 0; - if (hist_entry__add(thread, map, sym, ip, - chain, level, period)) { - eprintf("problem incrementing symbol count, skipping event\n"); - return -1; - } + if (hist_entry__add(thread, map, sym, ip, + chain, level, period)) { + eprintf("problem incrementing symbol count, skipping event\n"); + return -1; } + total += period; return 0; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index a39520e..c2e62be 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -6,12 +6,6 @@ #include #include -enum { - SHOW_KERNEL = 1, - SHOW_USER = 2, - SHOW_HV = 4, -}; - /* * PERF_SAMPLE_IP | PERF_SAMPLE_TID | * */ -- cgit v0.10.2 From 5c2068059a0e852f72b7c2608d92170b752d821f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 5 Oct 2009 14:26:15 -0300 Subject: perf top: Keep the default of asking for kernel module symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index befef84..34d48c1 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -804,10 +804,8 @@ static int symbol_filter(struct map *map, struct symbol *sym) static int parse_symbols(void) { - int use_modules = vmlinux_name ? 1 : 0; - if (dsos__load_kernel(vmlinux_name, sizeof(struct sym_entry), - symbol_filter, verbose, use_modules) <= 0) + symbol_filter, verbose, 1) <= 0) return -1; if (dump_symtab) -- cgit v0.10.2 From af427bf529c5991be8d1a36f43e2d0141f532f63 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 5 Oct 2009 14:26:17 -0300 Subject: perf tools: Create maps for modules when processing kallsyms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that we get kallsyms processing closer to vmlinux + modules symtabs processing. One change in behaviour is that since when one specifies --vmlinux -m should be used to ask for modules, so it is now for kallsyms as well. Also continue if one manages to load the vmlinux data but module processing fails, so that at least some analisys can be done with part of the needed symbols. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e882968..4dfdefd 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -27,6 +27,44 @@ enum dso_origin { static void dsos__add(struct dso *dso); static struct dso *dsos__find(const char *name); +static struct rb_root kernel_maps; + +static void dso__set_symbols_end(struct dso *self) +{ + struct rb_node *nd, *prevnd = rb_first(&self->syms); + + if (prevnd == NULL) + return; + + for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { + struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node), + *curr = rb_entry(nd, struct symbol, rb_node); + + if (prev->end == prev->start) + prev->end = curr->start - 1; + prevnd = nd; + } +} + +static void kernel_maps__fixup_sym_end(void) +{ + struct map *prev, *curr; + struct rb_node *nd, *prevnd = rb_first(&kernel_maps); + + if (prevnd == NULL) + return; + + curr = rb_entry(prevnd, struct map, rb_node); + dso__set_symbols_end(curr->dso); + + for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { + prev = curr; + curr = rb_entry(nd, struct map, rb_node); + prev->end = curr->start - 1; + dso__set_symbols_end(curr->dso); + } +} + static struct symbol *symbol__new(u64 start, u64 len, const char *name, unsigned int priv_size, int v) { @@ -162,10 +200,9 @@ size_t dso__fprintf(struct dso *self, FILE *fp) return ret; } -static int dso__load_kallsyms(struct dso *self, struct map *map, - symbol_filter_t filter, int v) +static int maps__load_kallsyms(symbol_filter_t filter, int use_modules, int v) { - struct rb_node *nd, *prevnd; + struct map *map = kernel_map; char *line = NULL; size_t n; FILE *file = fopen("/proc/kallsyms", "r"); @@ -179,6 +216,7 @@ static int dso__load_kallsyms(struct dso *self, struct map *map, struct symbol *sym; int line_len, len; char symbol_type; + char *module, *symbol_name; line_len = getline(&line, &n, file); if (line_len < 0) @@ -201,40 +239,50 @@ static int dso__load_kallsyms(struct dso *self, struct map *map, */ if (symbol_type != 'T' && symbol_type != 'W') continue; + + symbol_name = line + len + 2; + module = strchr(symbol_name, '\t'); + if (module) { + char *module_name_end; + + if (!use_modules) + continue; + *module = '\0'; + module = strchr(module + 1, '['); + if (!module) + continue; + module_name_end = strchr(module + 1, ']'); + if (!module_name_end) + continue; + *(module_name_end + 1) = '\0'; + if (strcmp(map->dso->name, module)) { + map = kernel_maps__find_by_dso_name(module); + if (!map) { + fputs("/proc/{kallsyms,modules} " + "inconsistency!\n", stderr); + return -1; + } + } + start = map->map_ip(map, start); + } else + map = kernel_map; /* * Well fix up the end later, when we have all sorted. */ - sym = symbol__new(start, 0xdead, line + len + 2, - self->sym_priv_size, v); + sym = symbol__new(start, 0, symbol_name, + map->dso->sym_priv_size, v); if (sym == NULL) goto out_delete_line; if (filter && filter(map, sym)) - symbol__delete(sym, self->sym_priv_size); + symbol__delete(sym, map->dso->sym_priv_size); else { - dso__insert_symbol(self, sym); + dso__insert_symbol(map->dso, sym); count++; } } - /* - * Now that we have all sorted out, just set the ->end of all - * symbols - */ - prevnd = rb_first(&self->syms); - - if (prevnd == NULL) - goto out_delete_line; - - for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { - struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node), - *curr = rb_entry(nd, struct symbol, rb_node); - - prev->end = curr->start - 1; - prevnd = nd; - } - free(line); fclose(file); @@ -246,6 +294,24 @@ out_failure: return -1; } +static size_t kernel_maps__fprintf(FILE *fp) +{ + size_t printed = fprintf(stderr, "Kernel maps:\n"); + struct rb_node *nd; + + printed += map__fprintf(kernel_map, fp); + printed += dso__fprintf(kernel_map->dso, fp); + + for (nd = rb_first(&kernel_maps); nd; nd = rb_next(nd)) { + struct map *pos = rb_entry(nd, struct map, rb_node); + + printed += map__fprintf(pos, fp); + printed += dso__fprintf(pos->dso, fp); + } + + return printed + fprintf(stderr, "END kernel maps\n"); +} + static int dso__load_perf_map(struct dso *self, struct map *map, symbol_filter_t filter, int v) { @@ -598,6 +664,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, char *demangled; int is_label = elf_sym__is_label(&sym); const char *section_name; + u64 sh_offset = 0; if (!is_label && !elf_sym__is_function(&sym)) continue; @@ -613,14 +680,18 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, section_name = elf_sec__name(&shdr, secstrs); + if ((kernel || kmodule)) { + if (strstr(section_name, ".init")) + sh_offset = shdr.sh_offset; + } + if (self->adjust_symbols) { if (v >= 2) printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); sym.st_value -= shdr.sh_addr - shdr.sh_offset; - } else if (kmodule) - sym.st_value += shdr.sh_offset; + } /* * We need to figure out if the object was created from C++ sources * DWARF DW_compile_unit has this, but we don't always have access @@ -631,7 +702,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, if (demangled != NULL) elf_name = demangled; - f = symbol__new(sym.st_value, sym.st_size, elf_name, + f = symbol__new(sym.st_value + sh_offset, sym.st_size, elf_name, self->sym_priv_size, v); free(demangled); if (!f) @@ -804,7 +875,6 @@ out: return ret; } -static struct rb_root kernel_maps; struct map *kernel_map; static void kernel_maps__insert(struct map *map) @@ -975,8 +1045,7 @@ static struct map *map__new2(u64 start, struct dso *dso) return self; } -int dsos__load_modules(unsigned int sym_priv_size, - symbol_filter_t filter, int v) +static int dsos__load_modules(unsigned int sym_priv_size) { char *line = NULL; size_t n; @@ -1034,8 +1103,7 @@ int dsos__load_modules(unsigned int sym_priv_size, free(line); fclose(file); - v = 1; - return dsos__load_modules_sym(filter, v); + return 0; out_delete_line: free(line); @@ -1075,25 +1143,37 @@ int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, kernel_map->map_ip = vdso__map_ip; + if (use_modules && dsos__load_modules(sym_priv_size) < 0) { + fprintf(stderr, "Failed to load list of modules in use! " + "Continuing...\n"); + use_modules = 0; + } + if (vmlinux) { err = dso__load_vmlinux(dso, kernel_map, vmlinux, filter, v); if (err > 0 && use_modules) { - int syms = dsos__load_modules(sym_priv_size, filter, v); + int syms = dsos__load_modules_sym(filter, v); - if (syms < 0) { - fprintf(stderr, "dsos__load_modules failed!\n"); - return syms; - } - err += syms; + if (syms < 0) + fprintf(stderr, "Failed to read module symbols!" + " Continuing...\n"); + else + err += syms; } } if (err <= 0) - err = dso__load_kallsyms(dso, kernel_map, filter, v); + err = maps__load_kallsyms(filter, use_modules, v); if (err > 0) { struct rb_node *node = rb_first(&dso->syms); struct symbol *sym = rb_entry(node, struct symbol, rb_node); + /* + * Now that we have all sorted out, just set the ->end of all + * symbols that still don't have it. + */ + dso__set_symbols_end(dso); + kernel_maps__fixup_sym_end(); kernel_map->start = sym->start; node = rb_last(&dso->syms); @@ -1106,6 +1186,9 @@ int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, * kernel_maps__insert(kernel_map) */ dsos__add(dso); + + if (v > 0) + kernel_maps__fprintf(stderr); } return err; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5339fd8..2e4522e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -70,8 +70,6 @@ struct symbol *dso__find_symbol(struct dso *self, u64 ip); int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, symbol_filter_t filter, int verbose, int modules); -int dsos__load_modules(unsigned int sym_priv_size, symbol_filter_t filter, - int verbose); int dso__load(struct dso *self, struct map *map, symbol_filter_t filter, int verbose); struct dso *dsos__findnew(const char *name); -- cgit v0.10.2 From a2a99e8e12798706ec1026e5d8fc36f7c86122ce Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 5 Oct 2009 14:26:18 -0300 Subject: perf tools: /proc/modules names don't always match its name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit $ cut -d' ' -f1 /proc/modules|grep _|wc -l 29 $ cut -d' ' -f1 /proc/modules|grep _|sed 's/$/.ko'/g|while read n;do find /lib/modules/`uname -r` -name $n;done|wc -l 12 For instance: $ grep ^aes_x86 /proc/modules aes_x86_64 9056 2 - Live 0xffffffffa0091000 $ l /lib/modules/2.6.31-tip/kernel/arch/x86/crypto/aes-x86_64.ko -rw-r--r-- 1 root root 136438 2009-09-22 19:05 /lib/modules/2.6.31-tip/kernel/arch/x86/crypto/aes-x86_64.ko Handle that by introducing a strxfrchar routine that replaces dashes with underscores when matching file names to loaded modules. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index c93eca9..04743d3 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -1,3 +1,4 @@ +#include #include "string.h" static int hex(char ch) @@ -32,3 +33,13 @@ int hex2u64(const char *ptr, u64 *long_val) return p - ptr; } + +char *strxfrchar(char *s, char from, char to) +{ + char *p = s; + + while ((p = strchr(p, from)) != NULL) + *p++ = to; + + return s; +} diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index 15c8274..2c84bf6 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h @@ -4,6 +4,7 @@ #include "types.h" int hex2u64(const char *ptr, u64 *val); +char *strxfrchar(char *s, char from, char to); #define _STR(x) #x #define STR(x) _STR(x) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 4dfdefd..e3eebdd 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -189,7 +189,7 @@ struct symbol *dso__find_symbol(struct dso *self, u64 ip) size_t dso__fprintf(struct dso *self, FILE *fp) { - size_t ret = fprintf(fp, "dso: %s\n", self->long_name); + size_t ret = fprintf(fp, "dso: %s\n", self->short_name); struct rb_node *nd; for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) { @@ -977,6 +977,7 @@ static int dsos__load_modules_sym_dir(char *dirname, snprintf(dso_name, sizeof(dso_name), "[%.*s]", (int)(dot - dent->d_name), dent->d_name); + strxfrchar(dso_name, '-', '_'); map = kernel_maps__find_by_dso_name(dso_name); if (map == NULL) continue; -- cgit v0.10.2 From c3b32fcbc7f4fd9a9b84718b991b175b0fd53f8c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 5 Oct 2009 14:26:16 -0300 Subject: perf report: Use kernel_maps__find_symbol as fallback to find vdsos, etc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In resolve_symbol, as we're moving to breaking the kernel symbols list per address ranges, i.e. kernel linking sections, so that we don't have a big kernel_map that in its range covers what is in the modules. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fe4aadc..12f8c86 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -384,11 +384,8 @@ got_map: * the "[vdso]" dso, but for now lets use the old * trick of looking in the whole kernel symbol list. */ - if ((long long)ip < 0) { - map = kernel_map; - if (mapp) - *mapp = map; - } + if ((long long)ip < 0) + return kernel_maps__find_symbol(ip, mapp); } dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); -- cgit v0.10.2 From cf82ff7ea7695b0e82ba07bc5e9f1bd03a74e1aa Mon Sep 17 00:00:00 2001 From: "Jayson R. King" Date: Mon, 5 Oct 2009 05:21:26 -0500 Subject: sched: Remove obsolete comment in sched_init() Remove the comment about calling alloc_bootmem() as it is not called here since commit 36b7b6d465489c4754c4fd66fcec6086eba87896. Signed-off-by: Jayson R. King Cc: Peter Zijlstra Cc: Jiri Kosina LKML-Reference: <4AC9C8A6.6010209@jaysonking.com> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 830967e..a56446d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9322,10 +9322,6 @@ void __init sched_init(void) #ifdef CONFIG_CPUMASK_OFFSTACK alloc_size += num_possible_cpus() * cpumask_size(); #endif - /* - * As sched_init() is called before page_alloc is setup, - * we use alloc_bootmem(). - */ if (alloc_size) { ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); -- cgit v0.10.2 From 26a50744b21fff65bd754874072857bee8967f4d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 6 Oct 2009 01:09:50 -0500 Subject: tracing/events: Add 'signed' field to format files The sign info used for filters in the kernel is also useful to applications that process the trace stream. Add it to the format files and make it available to userspace. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: rostedt@goodmis.org Cc: lizf@cn.fujitsu.com Cc: hch@infradead.org Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1254809398-8078-2-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index cc0d966..c9bbcab 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -120,9 +120,10 @@ #undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; @@ -132,19 +133,21 @@ #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ + (unsigned int)sizeof(field.item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; #undef __dynamic_array #define __dynamic_array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ - "offset:%u;\tsize:%u;\n", \ + "offset:%u;\tsize:%u;\tsigned:%u;\n", \ (unsigned int)offsetof(typeof(field), \ __data_loc_##item), \ - (unsigned int)sizeof(field.__data_loc_##item)); \ + (unsigned int)sizeof(field.__data_loc_##item), \ + (unsigned int)is_signed_type(type)); \ if (!ret) \ return 0; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d4ff019..e43c928 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -397,18 +397,21 @@ int ring_buffer_print_page_header(struct trace_seq *s) int ret; ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" - "offset:0;\tsize:%u;\n", - (unsigned int)sizeof(field.time_stamp)); + "offset:0;\tsize:%u;\tsigned:%u;\n", + (unsigned int)sizeof(field.time_stamp), + (unsigned int)is_signed_type(u64)); ret = trace_seq_printf(s, "\tfield: local_t commit;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), commit), - (unsigned int)sizeof(field.commit)); + (unsigned int)sizeof(field.commit), + (unsigned int)is_signed_type(long)); ret = trace_seq_printf(s, "\tfield: char data;\t" - "offset:%u;\tsize:%u;\n", + "offset:%u;\tsize:%u;\tsigned:%u;\n", (unsigned int)offsetof(typeof(field), data), - (unsigned int)BUF_PAGE_SIZE); + (unsigned int)BUF_PAGE_SIZE, + (unsigned int)is_signed_type(char)); return ret; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index d128f65..cf3cabf 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -507,7 +507,7 @@ extern char *__bad_type_size(void); #define FIELD(type, name) \ sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ #type, "common_" #name, offsetof(typeof(field), name), \ - sizeof(field.name) + sizeof(field.name), is_signed_type(type) static int trace_write_header(struct trace_seq *s) { @@ -515,17 +515,17 @@ static int trace_write_header(struct trace_seq *s) /* struct trace_entry */ return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\n", - FIELD(unsigned short, type), - FIELD(unsigned char, flags), - FIELD(unsigned char, preempt_count), - FIELD(int, pid), - FIELD(int, lock_depth)); + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" + "\n", + FIELD(unsigned short, type), + FIELD(unsigned char, flags), + FIELD(unsigned char, preempt_count), + FIELD(int, pid), + FIELD(int, lock_depth)); } static ssize_t diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 9753fcc..31da218 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -66,44 +66,47 @@ static void __used ____ftrace_check_##name(void) \ #undef __field #define __field(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), item), \ - sizeof(field.item)); \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; #undef __field_desc #define __field_desc(type, container, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), container.item), \ - sizeof(field.container.item)); \ + sizeof(field.container.item), \ + is_signed_type(type)); \ if (!ret) \ return 0; #undef __array #define __array(type, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item)); \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item), is_signed_type(type)); \ if (!ret) \ return 0; #undef __array_desc #define __array_desc(type, container, item, len) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\n", \ + "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ offsetof(typeof(field), container.item), \ - sizeof(field.container.item)); \ + sizeof(field.container.item), \ + is_signed_type(type)); \ if (!ret) \ return 0; #undef __dynamic_array #define __dynamic_array(type, item) \ ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:0;\n", \ - offsetof(typeof(field), item)); \ + "offset:%zu;\tsize:0;\tsigned:%u;\n", \ + offsetof(typeof(field), item), \ + is_signed_type(type)); \ if (!ret) \ return 0; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 527e17e..d99abc4 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -103,7 +103,8 @@ extern char *__bad_type_size(void); #define SYSCALL_FIELD(type, name) \ sizeof(type) != sizeof(trace.name) ? \ __bad_type_size() : \ - #type, #name, offsetof(typeof(trace), name), sizeof(trace.name) + #type, #name, offsetof(typeof(trace), name), \ + sizeof(trace.name), is_signed_type(type) int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) { @@ -120,7 +121,8 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) if (!entry) return 0; - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr)); if (!ret) return 0; @@ -130,8 +132,10 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) entry->args[i]); if (!ret) return 0; - ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;\n", offset, - sizeof(unsigned long)); + ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" + "\tsigned:%u;\n", offset, + sizeof(unsigned long), + is_signed_type(unsigned long)); if (!ret) return 0; offset += sizeof(unsigned long); @@ -163,8 +167,10 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) struct syscall_trace_exit trace; ret = trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n", + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n" + "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" + "\tsigned:%u;\n", SYSCALL_FIELD(int, nr), SYSCALL_FIELD(long, ret)); if (!ret) @@ -212,7 +218,7 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) if (ret) return ret; - ret = trace_define_field(call, SYSCALL_FIELD(long, ret), 0, + ret = trace_define_field(call, SYSCALL_FIELD(long, ret), FILTER_OTHER); return ret; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 55b41b9..be8412d 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -897,6 +897,21 @@ static int event_read_fields(struct event *event, struct format_field **fields) if (read_expected(EVENT_OP, (char *)";") < 0) goto fail_expect; + if (read_expected(EVENT_ITEM, (char *)"signed") < 0) + goto fail_expect; + + if (read_expected(EVENT_OP, (char *)":") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + if (strtoul(token, NULL, 0)) + field->flags |= FIELD_IS_SIGNED; + free_token(token); + + if (read_expected(EVENT_OP, (char *)";") < 0) + goto fail_expect; + if (read_expect_type(EVENT_NEWLINE, &token) < 0) goto fail; free_token(token); @@ -2845,6 +2860,15 @@ static void parse_header_field(char *type, free_token(token); if (read_expected(EVENT_OP, (char *)";") < 0) return; + if (read_expected(EVENT_ITEM, (char *)"signed") < 0) + return; + if (read_expected(EVENT_OP, (char *)":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + return; + free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) + return; if (read_expect_type(EVENT_NEWLINE, &token) < 0) return; free_token(token); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 162c3e6..00b440d 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -26,6 +26,7 @@ enum { enum format_flags { FIELD_IS_ARRAY = 1, FIELD_IS_POINTER = 2, + FIELD_IS_SIGNED = 4, }; struct format_field { -- cgit v0.10.2 From 2774601811bedd04ee7e38624343ea80b4a62d7e Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 6 Oct 2009 01:09:51 -0500 Subject: perf trace: Add subsystem string to struct event Needed to fully qualify event names for event stream processing. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: rostedt@goodmis.org Cc: lizf@cn.fujitsu.com Cc: hch@infradead.org Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1254809398-8078-3-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index be8412d..de3fc8b 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -2950,7 +2950,7 @@ int parse_ftrace_file(char *buf, unsigned long size) return 0; } -int parse_event_file(char *buf, unsigned long size, char *system__unused __unused) +int parse_event_file(char *buf, unsigned long size, char *sys) { struct event *event; int ret; @@ -2977,6 +2977,8 @@ int parse_event_file(char *buf, unsigned long size, char *system__unused __unuse if (ret < 0) die("failed to read event print fmt"); + event->system = strdup(sys); + #define PRINT_ARGS 0 if (PRINT_ARGS && event->print_fmt.args) print_args(event->print_fmt.args); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 00b440d..cb92978 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -133,6 +133,7 @@ struct event { int flags; struct format format; struct print_fmt print_fmt; + char *system; }; enum { @@ -167,7 +168,7 @@ void print_funcs(void); void print_printk(void); int parse_ftrace_file(char *buf, unsigned long size); -int parse_event_file(char *buf, unsigned long size, char *system); +int parse_event_file(char *buf, unsigned long size, char *sys); void print_event(int cpu, void *data, int size, unsigned long long nsecs, char *comm); -- cgit v0.10.2 From 064739bc4b3d7f424b2f25547e6611bcf0132415 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 6 Oct 2009 01:09:52 -0500 Subject: perf trace: Add string/dynamic cases to format_flags Needed for distinguishing string fields in event stream processing. Signed-off-by: Tom Zanussi Acked-by: Frederic Weisbecker Cc: rostedt@goodmis.org Cc: lizf@cn.fujitsu.com Cc: hch@infradead.org Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: <1254809398-8078-4-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index de3fc8b..6f851f9 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -721,6 +721,24 @@ static int event_read_id(void) return -1; } +static int field_is_string(struct format_field *field) +{ + if ((field->flags & FIELD_IS_ARRAY) && + (!strstr(field->type, "char") || !strstr(field->type, "u8") || + !strstr(field->type, "s8"))) + return 1; + + return 0; +} + +static int field_is_dynamic(struct format_field *field) +{ + if (!strcmp(field->type, "__data_loc")) + return 1; + + return 0; +} + static int event_read_fields(struct event *event, struct format_field **fields) { struct format_field *field = NULL; @@ -865,6 +883,12 @@ static int event_read_fields(struct event *event, struct format_field **fields) free(brackets); } + if (field_is_string(field)) { + field->flags |= FIELD_IS_STRING; + if (field_is_dynamic(field)) + field->flags |= FIELD_IS_DYNAMIC; + } + if (test_type_token(type, token, EVENT_OP, (char *)";")) goto fail; free_token(token); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index cb92978..5f59a39 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -27,6 +27,8 @@ enum format_flags { FIELD_IS_ARRAY = 1, FIELD_IS_POINTER = 2, FIELD_IS_SIGNED = 4, + FIELD_IS_STRING = 8, + FIELD_IS_DYNAMIC = 16, }; struct format_field { -- cgit v0.10.2 From 42e59d7d19dc4b49feab2a860fd9a8ca3248c833 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 6 Oct 2009 15:14:21 +0200 Subject: perf tools: Default to 1 KHz auto-sampling freq events Use auto-freq events by default in perf record and perf top. This allows more consistent hardware event sampling, regardless of the intensity of the underlying event. It also keeps us from over-sampling on larger/busier systems. (also make surrounding initializations more consistent) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3eeef33..494f8c7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -29,43 +29,43 @@ static int fd[MAX_NR_CPUS][MAX_COUNTERS]; static long default_interval = 100000; -static int nr_cpus = 0; +static int nr_cpus = 0; static unsigned int page_size; -static unsigned int mmap_pages = 128; -static int freq = 0; +static unsigned int mmap_pages = 128; +static int freq = 1000; static int output; static const char *output_name = "perf.data"; -static int group = 0; -static unsigned int realtime_prio = 0; -static int raw_samples = 0; -static int system_wide = 0; -static int profile_cpu = -1; -static pid_t target_pid = -1; -static pid_t child_pid = -1; -static int inherit = 1; -static int force = 0; -static int append_file = 0; -static int call_graph = 0; -static int inherit_stat = 0; -static int no_samples = 0; -static int sample_address = 0; -static int multiplex = 0; -static int multiplex_fd = -1; - -static long samples; +static int group = 0; +static unsigned int realtime_prio = 0; +static int raw_samples = 0; +static int system_wide = 0; +static int profile_cpu = -1; +static pid_t target_pid = -1; +static pid_t child_pid = -1; +static int inherit = 1; +static int force = 0; +static int append_file = 0; +static int call_graph = 0; +static int inherit_stat = 0; +static int no_samples = 0; +static int sample_address = 0; +static int multiplex = 0; +static int multiplex_fd = -1; + +static long samples = 0; static struct timeval last_read; static struct timeval this_read; -static u64 bytes_written; +static u64 bytes_written = 0; static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; -static int nr_poll; -static int nr_cpu; +static int nr_poll = 0; +static int nr_cpu = 0; -static int file_new = 1; +static int file_new = 1; -struct perf_header *header; +struct perf_header *header = NULL; struct mmap_data { int counter; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c574c5b..d978dc9 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -55,26 +55,26 @@ static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static int system_wide = 0; +static int system_wide = 0; static int default_interval = 100000; -static int count_filter = 5; -static int print_entries = 15; +static int count_filter = 5; +static int print_entries = 15; -static int target_pid = -1; -static int inherit = 0; -static int profile_cpu = -1; -static int nr_cpus = 0; -static unsigned int realtime_prio = 0; -static int group = 0; +static int target_pid = -1; +static int inherit = 0; +static int profile_cpu = -1; +static int nr_cpus = 0; +static unsigned int realtime_prio = 0; +static int group = 0; static unsigned int page_size; -static unsigned int mmap_pages = 16; -static int freq = 0; +static unsigned int mmap_pages = 16; +static int freq = 1000; /* 1 KHz */ -static int delay_secs = 2; -static int zero; -static int dump_symtab; +static int delay_secs = 2; +static int zero = 0; +static int dump_symtab = 0; /* * Source @@ -87,11 +87,11 @@ struct source_line { struct source_line *next; }; -static char *sym_filter = NULL; -struct sym_entry *sym_filter_entry = NULL; -static int sym_pcnt_filter = 5; -static int sym_counter = 0; -static int display_weighted = -1; +static char *sym_filter = NULL; +struct sym_entry *sym_filter_entry = NULL; +static int sym_pcnt_filter = 5; +static int sym_counter = 0; +static int display_weighted = -1; /* * Symbols -- cgit v0.10.2 From b209aa1f83964d49a332a7b6b818ebede5cdc6ef Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 6 Oct 2009 21:21:26 +0200 Subject: perf tools: Start the perf.data mapping at data offset in perf trace Currently, we are mapping perf.data in the beginning of the file and use the data offset as a buffer offset. This may exceed the mapping area if the data offset is upper than page_size * mmap_window and result in a page fault (thing that happen if we merge trace.info in perf.data). Instead, let's start the mapping in the page that matches our data offset. v2: Drop a junk from another patch (trace_report() removal) Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Tom Zanussi LKML-Reference: <1254856886-10348-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 5d4c84d..d573d4e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -143,6 +143,7 @@ static int __cmd_trace(void) int ret, rc = EXIT_FAILURE; unsigned long offset = 0; unsigned long head = 0; + unsigned long shift; struct stat perf_stat; event_t *event; uint32_t size; @@ -180,6 +181,10 @@ static int __cmd_trace(void) return EXIT_FAILURE; } + shift = page_size * (head / page_size); + offset += shift; + head -= shift; + remap: buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, MAP_SHARED, input, offset); @@ -192,9 +197,9 @@ more: event = (event_t *)(buf + head); if (head + event->header.size >= page_size * mmap_window) { - unsigned long shift = page_size * (head / page_size); int res; + shift = page_size * (head / page_size); res = munmap(buf, page_size * mmap_window); assert(res == 0); -- cgit v0.10.2 From 03456a158d9067d2f657bec170506009db81756d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 6 Oct 2009 23:36:47 +0200 Subject: perf tools: Merge trace.info content into perf.data This drops the trace.info file and move its contents into the common perf.data file. This is done by creating a new trace_info section into this file. A user of perf headers needs to call perf_header__set_trace_info() to save the trace meta informations into the perf.data file. A file created by perf after his patch is unsupported by previous version because the size of the headers have increased. That said, it's two new fields that have been added in the end of the headers, and those could be ignored by previous versions if they just handled the dynamic header size and then ignore the unknow part. The offsets guarantee the compatibility. We'll do a -stable fix for that. But current previous versions handle the header size using its static size, not dynamic, then it's not backward compatible with trace records. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <20091006213643.GA5343@nowhere> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 494f8c7..59af03d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -17,7 +17,6 @@ #include "util/header.h" #include "util/event.h" #include "util/debug.h" -#include "util/trace-event.h" #include #include @@ -566,17 +565,17 @@ static int __cmd_record(int argc, const char **argv) else header = perf_header__new(); - if (raw_samples) { - read_tracing_data(attrs, nr_counters); + perf_header__set_trace_info(); } else { for (i = 0; i < nr_counters; i++) { if (attrs[i].sample_type & PERF_SAMPLE_RAW) { - read_tracing_data(attrs, nr_counters); + perf_header__set_trace_info(); break; } } } + atexit(atexit_header); if (!system_wide) { diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 4470f25..1887138 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1634,7 +1634,6 @@ static int read_events(void) uint32_t size; char *buf; - trace_report(); register_idle_thread(&threads, &last_match); input = open(input_name, O_RDONLY); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d573d4e..d9abb4a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -149,7 +149,6 @@ static int __cmd_trace(void) uint32_t size; char *buf; - trace_report(); register_idle_thread(&threads, &last_match); input = open(input_name, O_RDONLY); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e306857..212fade 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -5,6 +5,8 @@ #include "util.h" #include "header.h" +#include "../perf.h" +#include "trace-event.h" /* * Create new perf.data header attribute: @@ -62,6 +64,8 @@ struct perf_header *perf_header__new(void) self->data_offset = 0; self->data_size = 0; + self->trace_info_offset = 0; + self->trace_info_size = 0; return self; } @@ -145,8 +149,16 @@ struct perf_file_header { struct perf_file_section attrs; struct perf_file_section data; struct perf_file_section event_types; + struct perf_file_section trace_info; }; +static int trace_info; + +void perf_header__set_trace_info(void) +{ + trace_info = 1; +} + static void do_write(int fd, void *buf, size_t size) { while (size) { @@ -198,6 +210,23 @@ void perf_header__write(struct perf_header *self, int fd) if (events) do_write(fd, events, self->event_size); + if (trace_info) { + static int trace_info_written; + + /* + * Write it only once + */ + if (!trace_info_written) { + self->trace_info_offset = lseek(fd, 0, SEEK_CUR); + read_tracing_data(fd, attrs, nr_counters); + self->trace_info_size = lseek(fd, 0, SEEK_CUR) - + self->trace_info_offset; + trace_info_written = 1; + } else { + lseek(fd, self->trace_info_offset + + self->trace_info_size, SEEK_SET); + } + } self->data_offset = lseek(fd, 0, SEEK_CUR); @@ -217,6 +246,10 @@ void perf_header__write(struct perf_header *self, int fd) .offset = self->event_offset, .size = self->event_size, }, + .trace_info = { + .offset = self->trace_info_offset, + .size = self->trace_info_size, + }, }; lseek(fd, 0, SEEK_SET); @@ -290,6 +323,15 @@ struct perf_header *perf_header__read(int fd) do_read(fd, events, f_header.event_types.size); event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } + + self->trace_info_offset = f_header.trace_info.offset; + self->trace_info_size = f_header.trace_info.size; + + if (self->trace_info_size) { + lseek(fd, self->trace_info_offset, SEEK_SET); + trace_report(fd); + } + self->event_offset = f_header.event_types.offset; self->event_size = f_header.event_types.size; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index a2916b6..30aee51 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -21,6 +21,8 @@ struct perf_header { u64 data_size; u64 event_offset; u64 event_size; + u64 trace_info_offset; + u64 trace_info_size; }; struct perf_header *perf_header__read(int fd); @@ -40,7 +42,7 @@ void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); u64 perf_header__sample_type(struct perf_header *header); struct perf_event_attr * perf_header__find_attr(u64 id, struct perf_header *header); - +void perf_header__set_trace_info(void); struct perf_header *perf_header__new(void); diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index af4b057..831052d 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -496,14 +496,12 @@ get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events) return path.next; } -void read_tracing_data(struct perf_event_attr *pattrs, int nb_events) +void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) { char buf[BUFSIZ]; struct tracepoint_path *tps; - output_fd = open(output_file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); - if (output_fd < 0) - die("creating file '%s'", output_file); + output_fd = fd; buf[0] = 23; buf[1] = 8; diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 1b5c847..44292e0 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -458,9 +458,8 @@ struct record *trace_read_data(int cpu) return data; } -void trace_report(void) +void trace_report(int fd) { - const char *input_file = "trace.info"; char buf[BUFSIZ]; char test[] = { 23, 8, 68 }; char *version; @@ -468,9 +467,7 @@ void trace_report(void) int show_funcs = 0; int show_printk = 0; - input_fd = open(input_file, O_RDONLY); - if (input_fd < 0) - die("opening '%s'\n", input_file); + input_fd = fd; read_or_die(buf, 3); if (memcmp(buf, test, 3) != 0) diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 5f59a39..da77e07 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -158,7 +158,7 @@ struct record *trace_read_data(int cpu); void parse_set_info(int nr_cpus, int long_sz); -void trace_report(void); +void trace_report(int fd); void *malloc_or_die(unsigned int size); @@ -244,6 +244,6 @@ unsigned long long raw_field_value(struct event *event, const char *name, void *data); void *raw_field_ptr(struct event *event, const char *name, void *data); -void read_tracing_data(struct perf_event_attr *pattrs, int nb_events); +void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); #endif /* __PERF_TRACE_EVENTS_H */ -- cgit v0.10.2 From c6d3aaa4e35c71a32a86ececacd4eea7ecfc316c Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Wed, 30 Sep 2009 13:37:50 -0400 Subject: selinux: dynamic class/perm discovery Modify SELinux to dynamically discover class and permission values upon policy load, based on the dynamic object class/perm discovery logic from libselinux. A mapping is created between kernel-private class and permission indices used outside the security server and the policy values used within the security server. The mappings are only applied upon kernel-internal computations; similar mappings for the private indices of userspace object managers is handled on a per-object manager basis by the userspace AVC. The interfaces for compute_av and transition_sid are split for kernel vs. userspace; the userspace functions are distinguished by a _user suffix. The kernel-private class indices are no longer tied to the policy values and thus do not need to skip indices for userspace classes; thus the kernel class index values are compressed. The flask.h definitions were regenerated by deleting the userspace classes from refpolicy's definitions and then regenerating the headers. Going forward, we can just maintain the flask.h, av_permissions.h, and classmap.h definitions separately from policy as they are no longer tied to the policy values. The next patch introduces a utility to automate generation of flask.h and av_permissions.h from the classmap.h definitions. The older kernel class and permission string tables are removed and replaced by a single security class mapping table that is walked at policy load to generate the mapping. The old kernel class validation logic is completely replaced by the mapping logic. The handle unknown logic is reworked. reject_unknown=1 is handled when the mappings are computed at policy load time, similar to the old handling by the class validation logic. allow_unknown=1 is handled when computing and mapping decisions - if the permission was not able to be mapped (i.e. undefined, mapped to zero), then it is automatically added to the allowed vector. If the class was not able to be mapped (i.e. undefined, mapped to zero), then all permissions are allowed for it if allow_unknown=1. avc_audit leverages the new security class mapping table to lookup the class and permission names from the kernel-private indices. The mdp program is updated to use the new table when generating the class definitions and allow rules for a minimal boot policy for the kernel. It should be noted that this policy will not include any userspace classes, nor will its policy index values for the kernel classes correspond with the ones in refpolicy (they will instead match the kernel-private indices). Signed-off-by: Stephen Smalley Signed-off-by: James Morris diff --git a/scripts/selinux/mdp/mdp.c b/scripts/selinux/mdp/mdp.c index b4ced85..62b34ce 100644 --- a/scripts/selinux/mdp/mdp.c +++ b/scripts/selinux/mdp/mdp.c @@ -29,86 +29,27 @@ #include #include -#include "flask.h" - static void usage(char *name) { printf("usage: %s [-m] policy_file context_file\n", name); exit(1); } -static void find_common_name(char *cname, char *dest, int len) -{ - char *start, *end; - - start = strchr(cname, '_')+1; - end = strchr(start, '_'); - if (!start || !end || start-cname > len || end-start > len) { - printf("Error with commons defines\n"); - exit(1); - } - strncpy(dest, start, end-start); - dest[end-start] = '\0'; -} - -#define S_(x) x, -static char *classlist[] = { -#include "class_to_string.h" - NULL +/* Class/perm mapping support */ +struct security_class_mapping { + const char *name; + const char *perms[sizeof(unsigned) * 8 + 1]; }; -#undef S_ +#include "classmap.h" #include "initial_sid_to_string.h" -#define TB_(x) char *x[] = { -#define TE_(x) NULL }; -#define S_(x) x, -#include "common_perm_to_string.h" -#undef TB_ -#undef TE_ -#undef S_ - -struct common { - char *cname; - char **perms; -}; -struct common common[] = { -#define TB_(x) { #x, x }, -#define S_(x) -#define TE_(x) -#include "common_perm_to_string.h" -#undef TB_ -#undef TE_ -#undef S_ -}; - -#define S_(x, y, z) {x, #y}, -struct av_inherit { - int class; - char *common; -}; -struct av_inherit av_inherit[] = { -#include "av_inherit.h" -}; -#undef S_ - -#include "av_permissions.h" -#define S_(x, y, z) {x, y, z}, -struct av_perms { - int class; - int perm_i; - char *perm_s; -}; -struct av_perms av_perms[] = { -#include "av_perm_to_string.h" -}; -#undef S_ - int main(int argc, char *argv[]) { int i, j, mls = 0; + int initial_sid_to_string_len; char **arg, *polout, *ctxout; - int classlist_len, initial_sid_to_string_len; + FILE *fout; if (argc < 3) @@ -127,64 +68,25 @@ int main(int argc, char *argv[]) usage(argv[0]); } - classlist_len = sizeof(classlist) / sizeof(char *); /* print out the classes */ - for (i=1; i < classlist_len; i++) { - if(classlist[i]) - fprintf(fout, "class %s\n", classlist[i]); - else - fprintf(fout, "class user%d\n", i); - } + for (i = 0; secclass_map[i].name; i++) + fprintf(fout, "class %s\n", secclass_map[i].name); fprintf(fout, "\n"); initial_sid_to_string_len = sizeof(initial_sid_to_string) / sizeof (char *); /* print out the sids */ - for (i=1; i < initial_sid_to_string_len; i++) + for (i = 1; i < initial_sid_to_string_len; i++) fprintf(fout, "sid %s\n", initial_sid_to_string[i]); fprintf(fout, "\n"); - /* print out the commons */ - for (i=0; i< sizeof(common)/sizeof(struct common); i++) { - char cname[101]; - find_common_name(common[i].cname, cname, 100); - cname[100] = '\0'; - fprintf(fout, "common %s\n{\n", cname); - for (j=0; common[i].perms[j]; j++) - fprintf(fout, "\t%s\n", common[i].perms[j]); - fprintf(fout, "}\n\n"); - } - fprintf(fout, "\n"); - /* print out the class permissions */ - for (i=1; i < classlist_len; i++) { - if (classlist[i]) { - int firstperm = -1, numperms = 0; - - fprintf(fout, "class %s\n", classlist[i]); - /* does it inherit from a common? */ - for (j=0; j < sizeof(av_inherit)/sizeof(struct av_inherit); j++) - if (av_inherit[j].class == i) - fprintf(fout, "inherits %s\n", av_inherit[j].common); - - for (j=0; j < sizeof(av_perms)/sizeof(struct av_perms); j++) { - if (av_perms[j].class == i) { - if (firstperm == -1) - firstperm = j; - numperms++; - } - } - if (!numperms) { - fprintf(fout, "\n"); - continue; - } - - fprintf(fout, "{\n"); - /* print out the av_perms */ - for (j=0; j < numperms; j++) { - fprintf(fout, "\t%s\n", av_perms[firstperm+j].perm_s); - } - fprintf(fout, "}\n\n"); - } + for (i = 0; secclass_map[i].name; i++) { + struct security_class_mapping *map = &secclass_map[i]; + fprintf(fout, "class %s\n", map->name); + fprintf(fout, "{\n"); + for (j = 0; map->perms[j]; j++) + fprintf(fout, "\t%s\n", map->perms[j]); + fprintf(fout, "}\n\n"); } fprintf(fout, "\n"); @@ -197,31 +99,34 @@ int main(int argc, char *argv[]) /* types, roles, and allows */ fprintf(fout, "type base_t;\n"); fprintf(fout, "role base_r types { base_t };\n"); - for (i=1; i < classlist_len; i++) { - if (classlist[i]) - fprintf(fout, "allow base_t base_t:%s *;\n", classlist[i]); - else - fprintf(fout, "allow base_t base_t:user%d *;\n", i); - } + for (i = 0; secclass_map[i].name; i++) + fprintf(fout, "allow base_t base_t:%s *;\n", + secclass_map[i].name); fprintf(fout, "user user_u roles { base_r };\n"); fprintf(fout, "\n"); /* default sids */ - for (i=1; i < initial_sid_to_string_len; i++) + for (i = 1; i < initial_sid_to_string_len; i++) fprintf(fout, "sid %s user_u:base_r:base_t\n", initial_sid_to_string[i]); fprintf(fout, "\n"); - fprintf(fout, "fs_use_xattr ext2 user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_xattr ext3 user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_xattr ext4 user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_xattr jfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_xattr xfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_xattr reiserfs user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_xattr jffs2 user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_xattr gfs2 user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_xattr lustre user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_task eventpollfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_task pipefs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_task sockfs user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_trans mqueue user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_trans devpts user_u:base_r:base_t;\n"); + fprintf(fout, "fs_use_trans hugetlbfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_trans tmpfs user_u:base_r:base_t;\n"); fprintf(fout, "fs_use_trans shm user_u:base_r:base_t;\n"); diff --git a/security/selinux/avc.c b/security/selinux/avc.c index b4b5da1..18f4103 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -31,43 +31,7 @@ #include #include "avc.h" #include "avc_ss.h" - -static const struct av_perm_to_string av_perm_to_string[] = { -#define S_(c, v, s) { c, v, s }, -#include "av_perm_to_string.h" -#undef S_ -}; - -static const char *class_to_string[] = { -#define S_(s) s, -#include "class_to_string.h" -#undef S_ -}; - -#define TB_(s) static const char *s[] = { -#define TE_(s) }; -#define S_(s) s, -#include "common_perm_to_string.h" -#undef TB_ -#undef TE_ -#undef S_ - -static const struct av_inherit av_inherit[] = { -#define S_(c, i, b) { .tclass = c,\ - .common_pts = common_##i##_perm_to_string,\ - .common_base = b }, -#include "av_inherit.h" -#undef S_ -}; - -const struct selinux_class_perm selinux_class_perm = { - .av_perm_to_string = av_perm_to_string, - .av_pts_len = ARRAY_SIZE(av_perm_to_string), - .class_to_string = class_to_string, - .cts_len = ARRAY_SIZE(class_to_string), - .av_inherit = av_inherit, - .av_inherit_len = ARRAY_SIZE(av_inherit) -}; +#include "classmap.h" #define AVC_CACHE_SLOTS 512 #define AVC_DEF_CACHE_THRESHOLD 512 @@ -139,52 +103,28 @@ static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass) */ static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av) { - const char **common_pts = NULL; - u32 common_base = 0; - int i, i2, perm; + const char **perms; + int i, perm; if (av == 0) { audit_log_format(ab, " null"); return; } - for (i = 0; i < ARRAY_SIZE(av_inherit); i++) { - if (av_inherit[i].tclass == tclass) { - common_pts = av_inherit[i].common_pts; - common_base = av_inherit[i].common_base; - break; - } - } + perms = secclass_map[tclass-1].perms; audit_log_format(ab, " {"); i = 0; perm = 1; - while (perm < common_base) { + while (i < (sizeof(av) * 8)) { if (perm & av) { - audit_log_format(ab, " %s", common_pts[i]); + audit_log_format(ab, " %s", perms[i]); av &= ~perm; } i++; perm <<= 1; } - while (i < sizeof(av) * 8) { - if (perm & av) { - for (i2 = 0; i2 < ARRAY_SIZE(av_perm_to_string); i2++) { - if ((av_perm_to_string[i2].tclass == tclass) && - (av_perm_to_string[i2].value == perm)) - break; - } - if (i2 < ARRAY_SIZE(av_perm_to_string)) { - audit_log_format(ab, " %s", - av_perm_to_string[i2].name); - av &= ~perm; - } - } - i++; - perm <<= 1; - } - if (av) audit_log_format(ab, " 0x%x", av); @@ -219,8 +159,8 @@ static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tcla kfree(scontext); } - BUG_ON(tclass >= ARRAY_SIZE(class_to_string) || !class_to_string[tclass]); - audit_log_format(ab, " tclass=%s", class_to_string[tclass]); + BUG_ON(tclass >= ARRAY_SIZE(secclass_map)); + audit_log_format(ab, " tclass=%s", secclass_map[tclass-1].name); } /** diff --git a/security/selinux/include/av_inherit.h b/security/selinux/include/av_inherit.h deleted file mode 100644 index abedcd7..0000000 --- a/security/selinux/include/av_inherit.h +++ /dev/null @@ -1,34 +0,0 @@ -/* This file is automatically generated. Do not edit. */ - S_(SECCLASS_DIR, file, 0x00020000UL) - S_(SECCLASS_FILE, file, 0x00020000UL) - S_(SECCLASS_LNK_FILE, file, 0x00020000UL) - S_(SECCLASS_CHR_FILE, file, 0x00020000UL) - S_(SECCLASS_BLK_FILE, file, 0x00020000UL) - S_(SECCLASS_SOCK_FILE, file, 0x00020000UL) - S_(SECCLASS_FIFO_FILE, file, 0x00020000UL) - S_(SECCLASS_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_TCP_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_UDP_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_RAWIP_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_PACKET_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_KEY_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_UNIX_STREAM_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_UNIX_DGRAM_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_TUN_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_IPC, ipc, 0x00000200UL) - S_(SECCLASS_SEM, ipc, 0x00000200UL) - S_(SECCLASS_MSGQ, ipc, 0x00000200UL) - S_(SECCLASS_SHM, ipc, 0x00000200UL) - S_(SECCLASS_NETLINK_ROUTE_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_FIREWALL_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_TCPDIAG_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_NFLOG_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_XFRM_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_SELINUX_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_AUDIT_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_IP6FW_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_DNRT_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_APPLETALK_SOCKET, socket, 0x00400000UL) - S_(SECCLASS_DCCP_SOCKET, socket, 0x00400000UL) diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h deleted file mode 100644 index 2b683ad..0000000 --- a/security/selinux/include/av_perm_to_string.h +++ /dev/null @@ -1,183 +0,0 @@ -/* This file is automatically generated. Do not edit. */ - S_(SECCLASS_FILESYSTEM, FILESYSTEM__MOUNT, "mount") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__REMOUNT, "remount") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__UNMOUNT, "unmount") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__GETATTR, "getattr") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__RELABELFROM, "relabelfrom") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__RELABELTO, "relabelto") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__TRANSITION, "transition") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__ASSOCIATE, "associate") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__QUOTAMOD, "quotamod") - S_(SECCLASS_FILESYSTEM, FILESYSTEM__QUOTAGET, "quotaget") - S_(SECCLASS_DIR, DIR__ADD_NAME, "add_name") - S_(SECCLASS_DIR, DIR__REMOVE_NAME, "remove_name") - S_(SECCLASS_DIR, DIR__REPARENT, "reparent") - S_(SECCLASS_DIR, DIR__SEARCH, "search") - S_(SECCLASS_DIR, DIR__RMDIR, "rmdir") - S_(SECCLASS_DIR, DIR__OPEN, "open") - S_(SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, "execute_no_trans") - S_(SECCLASS_FILE, FILE__ENTRYPOINT, "entrypoint") - S_(SECCLASS_FILE, FILE__EXECMOD, "execmod") - S_(SECCLASS_FILE, FILE__OPEN, "open") - S_(SECCLASS_CHR_FILE, CHR_FILE__EXECUTE_NO_TRANS, "execute_no_trans") - S_(SECCLASS_CHR_FILE, CHR_FILE__ENTRYPOINT, "entrypoint") - S_(SECCLASS_CHR_FILE, CHR_FILE__EXECMOD, "execmod") - S_(SECCLASS_CHR_FILE, CHR_FILE__OPEN, "open") - S_(SECCLASS_BLK_FILE, BLK_FILE__OPEN, "open") - S_(SECCLASS_SOCK_FILE, SOCK_FILE__OPEN, "open") - S_(SECCLASS_FIFO_FILE, FIFO_FILE__OPEN, "open") - S_(SECCLASS_FD, FD__USE, "use") - S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__CONNECTTO, "connectto") - S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__NEWCONN, "newconn") - S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__ACCEPTFROM, "acceptfrom") - S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__NODE_BIND, "node_bind") - S_(SECCLASS_TCP_SOCKET, TCP_SOCKET__NAME_CONNECT, "name_connect") - S_(SECCLASS_UDP_SOCKET, UDP_SOCKET__NODE_BIND, "node_bind") - S_(SECCLASS_RAWIP_SOCKET, RAWIP_SOCKET__NODE_BIND, "node_bind") - S_(SECCLASS_NODE, NODE__TCP_RECV, "tcp_recv") - S_(SECCLASS_NODE, NODE__TCP_SEND, "tcp_send") - S_(SECCLASS_NODE, NODE__UDP_RECV, "udp_recv") - S_(SECCLASS_NODE, NODE__UDP_SEND, "udp_send") - S_(SECCLASS_NODE, NODE__RAWIP_RECV, "rawip_recv") - S_(SECCLASS_NODE, NODE__RAWIP_SEND, "rawip_send") - S_(SECCLASS_NODE, NODE__ENFORCE_DEST, "enforce_dest") - S_(SECCLASS_NODE, NODE__DCCP_RECV, "dccp_recv") - S_(SECCLASS_NODE, NODE__DCCP_SEND, "dccp_send") - S_(SECCLASS_NODE, NODE__RECVFROM, "recvfrom") - S_(SECCLASS_NODE, NODE__SENDTO, "sendto") - S_(SECCLASS_NETIF, NETIF__TCP_RECV, "tcp_recv") - S_(SECCLASS_NETIF, NETIF__TCP_SEND, "tcp_send") - S_(SECCLASS_NETIF, NETIF__UDP_RECV, "udp_recv") - S_(SECCLASS_NETIF, NETIF__UDP_SEND, "udp_send") - S_(SECCLASS_NETIF, NETIF__RAWIP_RECV, "rawip_recv") - S_(SECCLASS_NETIF, NETIF__RAWIP_SEND, "rawip_send") - S_(SECCLASS_NETIF, NETIF__DCCP_RECV, "dccp_recv") - S_(SECCLASS_NETIF, NETIF__DCCP_SEND, "dccp_send") - S_(SECCLASS_NETIF, NETIF__INGRESS, "ingress") - S_(SECCLASS_NETIF, NETIF__EGRESS, "egress") - S_(SECCLASS_UNIX_STREAM_SOCKET, UNIX_STREAM_SOCKET__CONNECTTO, "connectto") - S_(SECCLASS_UNIX_STREAM_SOCKET, UNIX_STREAM_SOCKET__NEWCONN, "newconn") - S_(SECCLASS_UNIX_STREAM_SOCKET, UNIX_STREAM_SOCKET__ACCEPTFROM, "acceptfrom") - S_(SECCLASS_PROCESS, PROCESS__FORK, "fork") - S_(SECCLASS_PROCESS, PROCESS__TRANSITION, "transition") - S_(SECCLASS_PROCESS, PROCESS__SIGCHLD, "sigchld") - S_(SECCLASS_PROCESS, PROCESS__SIGKILL, "sigkill") - S_(SECCLASS_PROCESS, PROCESS__SIGSTOP, "sigstop") - S_(SECCLASS_PROCESS, PROCESS__SIGNULL, "signull") - S_(SECCLASS_PROCESS, PROCESS__SIGNAL, "signal") - S_(SECCLASS_PROCESS, PROCESS__PTRACE, "ptrace") - S_(SECCLASS_PROCESS, PROCESS__GETSCHED, "getsched") - S_(SECCLASS_PROCESS, PROCESS__SETSCHED, "setsched") - S_(SECCLASS_PROCESS, PROCESS__GETSESSION, "getsession") - S_(SECCLASS_PROCESS, PROCESS__GETPGID, "getpgid") - S_(SECCLASS_PROCESS, PROCESS__SETPGID, "setpgid") - S_(SECCLASS_PROCESS, PROCESS__GETCAP, "getcap") - S_(SECCLASS_PROCESS, PROCESS__SETCAP, "setcap") - S_(SECCLASS_PROCESS, PROCESS__SHARE, "share") - S_(SECCLASS_PROCESS, PROCESS__GETATTR, "getattr") - S_(SECCLASS_PROCESS, PROCESS__SETEXEC, "setexec") - S_(SECCLASS_PROCESS, PROCESS__SETFSCREATE, "setfscreate") - S_(SECCLASS_PROCESS, PROCESS__NOATSECURE, "noatsecure") - S_(SECCLASS_PROCESS, PROCESS__SIGINH, "siginh") - S_(SECCLASS_PROCESS, PROCESS__SETRLIMIT, "setrlimit") - S_(SECCLASS_PROCESS, PROCESS__RLIMITINH, "rlimitinh") - S_(SECCLASS_PROCESS, PROCESS__DYNTRANSITION, "dyntransition") - S_(SECCLASS_PROCESS, PROCESS__SETCURRENT, "setcurrent") - S_(SECCLASS_PROCESS, PROCESS__EXECMEM, "execmem") - S_(SECCLASS_PROCESS, PROCESS__EXECSTACK, "execstack") - S_(SECCLASS_PROCESS, PROCESS__EXECHEAP, "execheap") - S_(SECCLASS_PROCESS, PROCESS__SETKEYCREATE, "setkeycreate") - S_(SECCLASS_PROCESS, PROCESS__SETSOCKCREATE, "setsockcreate") - S_(SECCLASS_MSGQ, MSGQ__ENQUEUE, "enqueue") - S_(SECCLASS_MSG, MSG__SEND, "send") - S_(SECCLASS_MSG, MSG__RECEIVE, "receive") - S_(SECCLASS_SHM, SHM__LOCK, "lock") - S_(SECCLASS_SECURITY, SECURITY__COMPUTE_AV, "compute_av") - S_(SECCLASS_SECURITY, SECURITY__COMPUTE_CREATE, "compute_create") - S_(SECCLASS_SECURITY, SECURITY__COMPUTE_MEMBER, "compute_member") - S_(SECCLASS_SECURITY, SECURITY__CHECK_CONTEXT, "check_context") - S_(SECCLASS_SECURITY, SECURITY__LOAD_POLICY, "load_policy") - S_(SECCLASS_SECURITY, SECURITY__COMPUTE_RELABEL, "compute_relabel") - S_(SECCLASS_SECURITY, SECURITY__COMPUTE_USER, "compute_user") - S_(SECCLASS_SECURITY, SECURITY__SETENFORCE, "setenforce") - S_(SECCLASS_SECURITY, SECURITY__SETBOOL, "setbool") - S_(SECCLASS_SECURITY, SECURITY__SETSECPARAM, "setsecparam") - S_(SECCLASS_SECURITY, SECURITY__SETCHECKREQPROT, "setcheckreqprot") - S_(SECCLASS_SYSTEM, SYSTEM__IPC_INFO, "ipc_info") - S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_READ, "syslog_read") - S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_MOD, "syslog_mod") - S_(SECCLASS_SYSTEM, SYSTEM__SYSLOG_CONSOLE, "syslog_console") - S_(SECCLASS_SYSTEM, SYSTEM__MODULE_REQUEST, "module_request") - S_(SECCLASS_CAPABILITY, CAPABILITY__CHOWN, "chown") - S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_OVERRIDE, "dac_override") - S_(SECCLASS_CAPABILITY, CAPABILITY__DAC_READ_SEARCH, "dac_read_search") - S_(SECCLASS_CAPABILITY, CAPABILITY__FOWNER, "fowner") - S_(SECCLASS_CAPABILITY, CAPABILITY__FSETID, "fsetid") - S_(SECCLASS_CAPABILITY, CAPABILITY__KILL, "kill") - S_(SECCLASS_CAPABILITY, CAPABILITY__SETGID, "setgid") - S_(SECCLASS_CAPABILITY, CAPABILITY__SETUID, "setuid") - S_(SECCLASS_CAPABILITY, CAPABILITY__SETPCAP, "setpcap") - S_(SECCLASS_CAPABILITY, CAPABILITY__LINUX_IMMUTABLE, "linux_immutable") - S_(SECCLASS_CAPABILITY, CAPABILITY__NET_BIND_SERVICE, "net_bind_service") - S_(SECCLASS_CAPABILITY, CAPABILITY__NET_BROADCAST, "net_broadcast") - S_(SECCLASS_CAPABILITY, CAPABILITY__NET_ADMIN, "net_admin") - S_(SECCLASS_CAPABILITY, CAPABILITY__NET_RAW, "net_raw") - S_(SECCLASS_CAPABILITY, CAPABILITY__IPC_LOCK, "ipc_lock") - S_(SECCLASS_CAPABILITY, CAPABILITY__IPC_OWNER, "ipc_owner") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_MODULE, "sys_module") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_RAWIO, "sys_rawio") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_CHROOT, "sys_chroot") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_PTRACE, "sys_ptrace") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_PACCT, "sys_pacct") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_ADMIN, "sys_admin") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_BOOT, "sys_boot") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_NICE, "sys_nice") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_RESOURCE, "sys_resource") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_TIME, "sys_time") - S_(SECCLASS_CAPABILITY, CAPABILITY__SYS_TTY_CONFIG, "sys_tty_config") - S_(SECCLASS_CAPABILITY, CAPABILITY__MKNOD, "mknod") - S_(SECCLASS_CAPABILITY, CAPABILITY__LEASE, "lease") - S_(SECCLASS_CAPABILITY, CAPABILITY__AUDIT_WRITE, "audit_write") - S_(SECCLASS_CAPABILITY, CAPABILITY__AUDIT_CONTROL, "audit_control") - S_(SECCLASS_CAPABILITY, CAPABILITY__SETFCAP, "setfcap") - S_(SECCLASS_CAPABILITY2, CAPABILITY2__MAC_OVERRIDE, "mac_override") - S_(SECCLASS_CAPABILITY2, CAPABILITY2__MAC_ADMIN, "mac_admin") - S_(SECCLASS_NETLINK_ROUTE_SOCKET, NETLINK_ROUTE_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_ROUTE_SOCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_NETLINK_FIREWALL_SOCKET, NETLINK_FIREWALL_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_FIREWALL_SOCKET, NETLINK_FIREWALL_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_NETLINK_TCPDIAG_SOCKET, NETLINK_TCPDIAG_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_TCPDIAG_SOCKET, NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_NETLINK_XFRM_SOCKET, NETLINK_XFRM_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_XFRM_SOCKET, NETLINK_XFRM_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_NETLINK_AUDIT_SOCKET, NETLINK_AUDIT_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_AUDIT_SOCKET, NETLINK_AUDIT_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_NETLINK_AUDIT_SOCKET, NETLINK_AUDIT_SOCKET__NLMSG_RELAY, "nlmsg_relay") - S_(SECCLASS_NETLINK_AUDIT_SOCKET, NETLINK_AUDIT_SOCKET__NLMSG_READPRIV, "nlmsg_readpriv") - S_(SECCLASS_NETLINK_AUDIT_SOCKET, NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT, "nlmsg_tty_audit") - S_(SECCLASS_NETLINK_IP6FW_SOCKET, NETLINK_IP6FW_SOCKET__NLMSG_READ, "nlmsg_read") - S_(SECCLASS_NETLINK_IP6FW_SOCKET, NETLINK_IP6FW_SOCKET__NLMSG_WRITE, "nlmsg_write") - S_(SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, "sendto") - S_(SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, "recvfrom") - S_(SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, "setcontext") - S_(SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, "polmatch") - S_(SECCLASS_PACKET, PACKET__SEND, "send") - S_(SECCLASS_PACKET, PACKET__RECV, "recv") - S_(SECCLASS_PACKET, PACKET__RELABELTO, "relabelto") - S_(SECCLASS_PACKET, PACKET__FLOW_IN, "flow_in") - S_(SECCLASS_PACKET, PACKET__FLOW_OUT, "flow_out") - S_(SECCLASS_PACKET, PACKET__FORWARD_IN, "forward_in") - S_(SECCLASS_PACKET, PACKET__FORWARD_OUT, "forward_out") - S_(SECCLASS_KEY, KEY__VIEW, "view") - S_(SECCLASS_KEY, KEY__READ, "read") - S_(SECCLASS_KEY, KEY__WRITE, "write") - S_(SECCLASS_KEY, KEY__SEARCH, "search") - S_(SECCLASS_KEY, KEY__LINK, "link") - S_(SECCLASS_KEY, KEY__SETATTR, "setattr") - S_(SECCLASS_KEY, KEY__CREATE, "create") - S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NODE_BIND, "node_bind") - S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NAME_CONNECT, "name_connect") - S_(SECCLASS_MEMPROTECT, MEMPROTECT__MMAP_ZERO, "mmap_zero") - S_(SECCLASS_PEER, PEER__RECV, "recv") - S_(SECCLASS_KERNEL_SERVICE, KERNEL_SERVICE__USE_AS_OVERRIDE, "use_as_override") - S_(SECCLASS_KERNEL_SERVICE, KERNEL_SERVICE__CREATE_FILES_AS, "create_files_as") diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h index 0546d61..fef2582 100644 --- a/security/selinux/include/av_permissions.h +++ b/security/selinux/include/av_permissions.h @@ -423,28 +423,6 @@ #define UNIX_DGRAM_SOCKET__RECV_MSG 0x00080000UL #define UNIX_DGRAM_SOCKET__SEND_MSG 0x00100000UL #define UNIX_DGRAM_SOCKET__NAME_BIND 0x00200000UL -#define TUN_SOCKET__IOCTL 0x00000001UL -#define TUN_SOCKET__READ 0x00000002UL -#define TUN_SOCKET__WRITE 0x00000004UL -#define TUN_SOCKET__CREATE 0x00000008UL -#define TUN_SOCKET__GETATTR 0x00000010UL -#define TUN_SOCKET__SETATTR 0x00000020UL -#define TUN_SOCKET__LOCK 0x00000040UL -#define TUN_SOCKET__RELABELFROM 0x00000080UL -#define TUN_SOCKET__RELABELTO 0x00000100UL -#define TUN_SOCKET__APPEND 0x00000200UL -#define TUN_SOCKET__BIND 0x00000400UL -#define TUN_SOCKET__CONNECT 0x00000800UL -#define TUN_SOCKET__LISTEN 0x00001000UL -#define TUN_SOCKET__ACCEPT 0x00002000UL -#define TUN_SOCKET__GETOPT 0x00004000UL -#define TUN_SOCKET__SETOPT 0x00008000UL -#define TUN_SOCKET__SHUTDOWN 0x00010000UL -#define TUN_SOCKET__RECVFROM 0x00020000UL -#define TUN_SOCKET__SENDTO 0x00040000UL -#define TUN_SOCKET__RECV_MSG 0x00080000UL -#define TUN_SOCKET__SEND_MSG 0x00100000UL -#define TUN_SOCKET__NAME_BIND 0x00200000UL #define PROCESS__FORK 0x00000001UL #define PROCESS__TRANSITION 0x00000002UL #define PROCESS__SIGCHLD 0x00000004UL @@ -868,3 +846,25 @@ #define PEER__RECV 0x00000001UL #define KERNEL_SERVICE__USE_AS_OVERRIDE 0x00000001UL #define KERNEL_SERVICE__CREATE_FILES_AS 0x00000002UL +#define TUN_SOCKET__IOCTL 0x00000001UL +#define TUN_SOCKET__READ 0x00000002UL +#define TUN_SOCKET__WRITE 0x00000004UL +#define TUN_SOCKET__CREATE 0x00000008UL +#define TUN_SOCKET__GETATTR 0x00000010UL +#define TUN_SOCKET__SETATTR 0x00000020UL +#define TUN_SOCKET__LOCK 0x00000040UL +#define TUN_SOCKET__RELABELFROM 0x00000080UL +#define TUN_SOCKET__RELABELTO 0x00000100UL +#define TUN_SOCKET__APPEND 0x00000200UL +#define TUN_SOCKET__BIND 0x00000400UL +#define TUN_SOCKET__CONNECT 0x00000800UL +#define TUN_SOCKET__LISTEN 0x00001000UL +#define TUN_SOCKET__ACCEPT 0x00002000UL +#define TUN_SOCKET__GETOPT 0x00004000UL +#define TUN_SOCKET__SETOPT 0x00008000UL +#define TUN_SOCKET__SHUTDOWN 0x00010000UL +#define TUN_SOCKET__RECVFROM 0x00020000UL +#define TUN_SOCKET__SENDTO 0x00040000UL +#define TUN_SOCKET__RECV_MSG 0x00080000UL +#define TUN_SOCKET__SEND_MSG 0x00100000UL +#define TUN_SOCKET__NAME_BIND 0x00200000UL diff --git a/security/selinux/include/avc_ss.h b/security/selinux/include/avc_ss.h index bb1ec80..4677aa5 100644 --- a/security/selinux/include/avc_ss.h +++ b/security/selinux/include/avc_ss.h @@ -10,26 +10,13 @@ int avc_ss_reset(u32 seqno); -struct av_perm_to_string { - u16 tclass; - u32 value; +/* Class/perm mapping support */ +struct security_class_mapping { const char *name; + const char *perms[sizeof(u32) * 8 + 1]; }; -struct av_inherit { - const char **common_pts; - u32 common_base; - u16 tclass; -}; - -struct selinux_class_perm { - const struct av_perm_to_string *av_perm_to_string; - u32 av_pts_len; - u32 cts_len; - const char **class_to_string; - const struct av_inherit *av_inherit; - u32 av_inherit_len; -}; +extern struct security_class_mapping secclass_map[]; #endif /* _SELINUX_AVC_SS_H_ */ diff --git a/security/selinux/include/class_to_string.h b/security/selinux/include/class_to_string.h deleted file mode 100644 index 7ab9299..0000000 --- a/security/selinux/include/class_to_string.h +++ /dev/null @@ -1,80 +0,0 @@ -/* This file is automatically generated. Do not edit. */ -/* - * Security object class definitions - */ - S_(NULL) - S_("security") - S_("process") - S_("system") - S_("capability") - S_("filesystem") - S_("file") - S_("dir") - S_("fd") - S_("lnk_file") - S_("chr_file") - S_("blk_file") - S_("sock_file") - S_("fifo_file") - S_("socket") - S_("tcp_socket") - S_("udp_socket") - S_("rawip_socket") - S_("node") - S_("netif") - S_("netlink_socket") - S_("packet_socket") - S_("key_socket") - S_("unix_stream_socket") - S_("unix_dgram_socket") - S_("sem") - S_("msg") - S_("msgq") - S_("shm") - S_("ipc") - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_("netlink_route_socket") - S_("netlink_firewall_socket") - S_("netlink_tcpdiag_socket") - S_("netlink_nflog_socket") - S_("netlink_xfrm_socket") - S_("netlink_selinux_socket") - S_("netlink_audit_socket") - S_("netlink_ip6fw_socket") - S_("netlink_dnrt_socket") - S_(NULL) - S_(NULL) - S_("association") - S_("netlink_kobject_uevent_socket") - S_("appletalk_socket") - S_("packet") - S_("key") - S_(NULL) - S_("dccp_socket") - S_("memprotect") - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_("peer") - S_("capability2") - S_(NULL) - S_(NULL) - S_(NULL) - S_(NULL) - S_("kernel_service") - S_("tun_socket") diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h new file mode 100644 index 0000000..8b32e95 --- /dev/null +++ b/security/selinux/include/classmap.h @@ -0,0 +1,150 @@ +#define COMMON_FILE_SOCK_PERMS "ioctl", "read", "write", "create", \ + "getattr", "setattr", "lock", "relabelfrom", "relabelto", "append" + +#define COMMON_FILE_PERMS COMMON_FILE_SOCK_PERMS, "unlink", "link", \ + "rename", "execute", "swapon", "quotaon", "mounton" + +#define COMMON_SOCK_PERMS COMMON_FILE_SOCK_PERMS, "bind", "connect", \ + "listen", "accept", "getopt", "setopt", "shutdown", "recvfrom", \ + "sendto", "recv_msg", "send_msg", "name_bind" + +#define COMMON_IPC_PERMS "create", "destroy", "getattr", "setattr", "read", \ + "write", "associate", "unix_read", "unix_write" + +struct security_class_mapping secclass_map[] = { + { "security", + { "compute_av", "compute_create", "compute_member", + "check_context", "load_policy", "compute_relabel", + "compute_user", "setenforce", "setbool", "setsecparam", + "setcheckreqprot", NULL } }, + { "process", + { "fork", "transition", "sigchld", "sigkill", + "sigstop", "signull", "signal", "ptrace", "getsched", "setsched", + "getsession", "getpgid", "setpgid", "getcap", "setcap", "share", + "getattr", "setexec", "setfscreate", "noatsecure", "siginh", + "setrlimit", "rlimitinh", "dyntransition", "setcurrent", + "execmem", "execstack", "execheap", "setkeycreate", + "setsockcreate", NULL } }, + { "system", + { "ipc_info", "syslog_read", "syslog_mod", + "syslog_console", "module_request", NULL } }, + { "capability", + { "chown", "dac_override", "dac_read_search", + "fowner", "fsetid", "kill", "setgid", "setuid", "setpcap", + "linux_immutable", "net_bind_service", "net_broadcast", + "net_admin", "net_raw", "ipc_lock", "ipc_owner", "sys_module", + "sys_rawio", "sys_chroot", "sys_ptrace", "sys_pacct", "sys_admin", + "sys_boot", "sys_nice", "sys_resource", "sys_time", + "sys_tty_config", "mknod", "lease", "audit_write", + "audit_control", "setfcap", NULL } }, + { "filesystem", + { "mount", "remount", "unmount", "getattr", + "relabelfrom", "relabelto", "transition", "associate", "quotamod", + "quotaget", NULL } }, + { "file", + { COMMON_FILE_PERMS, + "execute_no_trans", "entrypoint", "execmod", "open", NULL } }, + { "dir", + { COMMON_FILE_PERMS, "add_name", "remove_name", + "reparent", "search", "rmdir", "open", NULL } }, + { "fd", { "use", NULL } }, + { "lnk_file", + { COMMON_FILE_PERMS, NULL } }, + { "chr_file", + { COMMON_FILE_PERMS, + "execute_no_trans", "entrypoint", "execmod", "open", NULL } }, + { "blk_file", + { COMMON_FILE_PERMS, "open", NULL } }, + { "sock_file", + { COMMON_FILE_PERMS, "open", NULL } }, + { "fifo_file", + { COMMON_FILE_PERMS, "open", NULL } }, + { "socket", + { COMMON_SOCK_PERMS, NULL } }, + { "tcp_socket", + { COMMON_SOCK_PERMS, + "connectto", "newconn", "acceptfrom", "node_bind", "name_connect", + NULL } }, + { "udp_socket", + { COMMON_SOCK_PERMS, + "node_bind", NULL } }, + { "rawip_socket", + { COMMON_SOCK_PERMS, + "node_bind", NULL } }, + { "node", + { "tcp_recv", "tcp_send", "udp_recv", "udp_send", + "rawip_recv", "rawip_send", "enforce_dest", + "dccp_recv", "dccp_send", "recvfrom", "sendto", NULL } }, + { "netif", + { "tcp_recv", "tcp_send", "udp_recv", "udp_send", + "rawip_recv", "rawip_send", "dccp_recv", "dccp_send", + "ingress", "egress", NULL } }, + { "netlink_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "packet_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "key_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "unix_stream_socket", + { COMMON_SOCK_PERMS, "connectto", "newconn", "acceptfrom", NULL + } }, + { "unix_dgram_socket", + { COMMON_SOCK_PERMS, NULL + } }, + { "sem", + { COMMON_IPC_PERMS, NULL } }, + { "msg", { "send", "receive", NULL } }, + { "msgq", + { COMMON_IPC_PERMS, "enqueue", NULL } }, + { "shm", + { COMMON_IPC_PERMS, "lock", NULL } }, + { "ipc", + { COMMON_IPC_PERMS, NULL } }, + { "netlink_route_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", NULL } }, + { "netlink_firewall_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", NULL } }, + { "netlink_tcpdiag_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", NULL } }, + { "netlink_nflog_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "netlink_xfrm_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", NULL } }, + { "netlink_selinux_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "netlink_audit_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", "nlmsg_relay", "nlmsg_readpriv", + "nlmsg_tty_audit", NULL } }, + { "netlink_ip6fw_socket", + { COMMON_SOCK_PERMS, + "nlmsg_read", "nlmsg_write", NULL } }, + { "netlink_dnrt_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "association", + { "sendto", "recvfrom", "setcontext", "polmatch", NULL } }, + { "netlink_kobject_uevent_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "appletalk_socket", + { COMMON_SOCK_PERMS, NULL } }, + { "packet", + { "send", "recv", "relabelto", "flow_in", "flow_out", + "forward_in", "forward_out", NULL } }, + { "key", + { "view", "read", "write", "search", "link", "setattr", "create", + NULL } }, + { "dccp_socket", + { COMMON_SOCK_PERMS, + "node_bind", "name_connect", NULL } }, + { "memprotect", { "mmap_zero", NULL } }, + { "peer", { "recv", NULL } }, + { "capability2", { "mac_override", "mac_admin", NULL } }, + { "kernel_service", { "use_as_override", "create_files_as", NULL } }, + { "tun_socket", + { COMMON_SOCK_PERMS, NULL } }, + { NULL } + }; diff --git a/security/selinux/include/common_perm_to_string.h b/security/selinux/include/common_perm_to_string.h deleted file mode 100644 index ce5b6e2f..0000000 --- a/security/selinux/include/common_perm_to_string.h +++ /dev/null @@ -1,58 +0,0 @@ -/* This file is automatically generated. Do not edit. */ -TB_(common_file_perm_to_string) - S_("ioctl") - S_("read") - S_("write") - S_("create") - S_("getattr") - S_("setattr") - S_("lock") - S_("relabelfrom") - S_("relabelto") - S_("append") - S_("unlink") - S_("link") - S_("rename") - S_("execute") - S_("swapon") - S_("quotaon") - S_("mounton") -TE_(common_file_perm_to_string) - -TB_(common_socket_perm_to_string) - S_("ioctl") - S_("read") - S_("write") - S_("create") - S_("getattr") - S_("setattr") - S_("lock") - S_("relabelfrom") - S_("relabelto") - S_("append") - S_("bind") - S_("connect") - S_("listen") - S_("accept") - S_("getopt") - S_("setopt") - S_("shutdown") - S_("recvfrom") - S_("sendto") - S_("recv_msg") - S_("send_msg") - S_("name_bind") -TE_(common_socket_perm_to_string) - -TB_(common_ipc_perm_to_string) - S_("create") - S_("destroy") - S_("getattr") - S_("setattr") - S_("read") - S_("write") - S_("associate") - S_("unix_read") - S_("unix_write") -TE_(common_ipc_perm_to_string) - diff --git a/security/selinux/include/flask.h b/security/selinux/include/flask.h index f248500..5359ca2 100644 --- a/security/selinux/include/flask.h +++ b/security/selinux/include/flask.h @@ -34,26 +34,26 @@ #define SECCLASS_MSGQ 27 #define SECCLASS_SHM 28 #define SECCLASS_IPC 29 -#define SECCLASS_NETLINK_ROUTE_SOCKET 43 -#define SECCLASS_NETLINK_FIREWALL_SOCKET 44 -#define SECCLASS_NETLINK_TCPDIAG_SOCKET 45 -#define SECCLASS_NETLINK_NFLOG_SOCKET 46 -#define SECCLASS_NETLINK_XFRM_SOCKET 47 -#define SECCLASS_NETLINK_SELINUX_SOCKET 48 -#define SECCLASS_NETLINK_AUDIT_SOCKET 49 -#define SECCLASS_NETLINK_IP6FW_SOCKET 50 -#define SECCLASS_NETLINK_DNRT_SOCKET 51 -#define SECCLASS_ASSOCIATION 54 -#define SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET 55 -#define SECCLASS_APPLETALK_SOCKET 56 -#define SECCLASS_PACKET 57 -#define SECCLASS_KEY 58 -#define SECCLASS_DCCP_SOCKET 60 -#define SECCLASS_MEMPROTECT 61 -#define SECCLASS_PEER 68 -#define SECCLASS_CAPABILITY2 69 -#define SECCLASS_KERNEL_SERVICE 74 -#define SECCLASS_TUN_SOCKET 75 +#define SECCLASS_NETLINK_ROUTE_SOCKET 30 +#define SECCLASS_NETLINK_FIREWALL_SOCKET 31 +#define SECCLASS_NETLINK_TCPDIAG_SOCKET 32 +#define SECCLASS_NETLINK_NFLOG_SOCKET 33 +#define SECCLASS_NETLINK_XFRM_SOCKET 34 +#define SECCLASS_NETLINK_SELINUX_SOCKET 35 +#define SECCLASS_NETLINK_AUDIT_SOCKET 36 +#define SECCLASS_NETLINK_IP6FW_SOCKET 37 +#define SECCLASS_NETLINK_DNRT_SOCKET 38 +#define SECCLASS_ASSOCIATION 39 +#define SECCLASS_NETLINK_KOBJECT_UEVENT_SOCKET 40 +#define SECCLASS_APPLETALK_SOCKET 41 +#define SECCLASS_PACKET 42 +#define SECCLASS_KEY 43 +#define SECCLASS_DCCP_SOCKET 44 +#define SECCLASS_MEMPROTECT 45 +#define SECCLASS_PEER 46 +#define SECCLASS_CAPABILITY2 47 +#define SECCLASS_KERNEL_SERVICE 48 +#define SECCLASS_TUN_SOCKET 49 /* * Security identifier indices for initial entities diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index ca83579..2553266 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -97,11 +97,18 @@ struct av_decision { #define AVD_FLAGS_PERMISSIVE 0x0001 int security_compute_av(u32 ssid, u32 tsid, - u16 tclass, u32 requested, - struct av_decision *avd); + u16 tclass, u32 requested, + struct av_decision *avd); + +int security_compute_av_user(u32 ssid, u32 tsid, + u16 tclass, u32 requested, + struct av_decision *avd); int security_transition_sid(u32 ssid, u32 tsid, - u16 tclass, u32 *out_sid); + u16 tclass, u32 *out_sid); + +int security_transition_sid_user(u32 ssid, u32 tsid, + u16 tclass, u32 *out_sid); int security_member_sid(u32 ssid, u32 tsid, u16 tclass, u32 *out_sid); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index b4fc506..fab36fd 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -522,7 +522,7 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size) if (length < 0) goto out2; - length = security_compute_av(ssid, tsid, tclass, req, &avd); + length = security_compute_av_user(ssid, tsid, tclass, req, &avd); if (length < 0) goto out2; @@ -571,7 +571,7 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) if (length < 0) goto out2; - length = security_transition_sid(ssid, tsid, tclass, &newsid); + length = security_transition_sid_user(ssid, tsid, tclass, &newsid); if (length < 0) goto out2; diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index b5407f1..3f2b270 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -532,7 +532,7 @@ int mls_compute_sid(struct context *scontext, } /* Fallthrough */ case AVTAB_CHANGE: - if (tclass == SECCLASS_PROCESS) + if (tclass == policydb.process_class) /* Use the process MLS attributes. */ return mls_context_cpy(newcontext, scontext); else diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 72e4a54..f036672 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -713,7 +713,6 @@ void policydb_destroy(struct policydb *p) ebitmap_destroy(&p->type_attr_map[i]); } kfree(p->type_attr_map); - kfree(p->undefined_perms); ebitmap_destroy(&p->policycaps); ebitmap_destroy(&p->permissive_map); @@ -1640,6 +1639,40 @@ static int policydb_bounds_sanity_check(struct policydb *p) extern int ss_initialized; +u16 string_to_security_class(struct policydb *p, const char *name) +{ + struct class_datum *cladatum; + + cladatum = hashtab_search(p->p_classes.table, name); + if (!cladatum) + return 0; + + return cladatum->value; +} + +u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name) +{ + struct class_datum *cladatum; + struct perm_datum *perdatum = NULL; + struct common_datum *comdatum; + + if (!tclass || tclass > p->p_classes.nprim) + return 0; + + cladatum = p->class_val_to_struct[tclass-1]; + comdatum = cladatum->comdatum; + if (comdatum) + perdatum = hashtab_search(comdatum->permissions.table, + name); + if (!perdatum) + perdatum = hashtab_search(cladatum->permissions.table, + name); + if (!perdatum) + return 0; + + return 1U << (perdatum->value-1); +} + /* * Read the configuration data from a policy database binary * representation file into a policy database structure. @@ -1861,6 +1894,16 @@ int policydb_read(struct policydb *p, void *fp) if (rc) goto bad; + p->process_class = string_to_security_class(p, "process"); + if (!p->process_class) + goto bad; + p->process_trans_perms = string_to_av_perm(p, p->process_class, + "transition"); + p->process_trans_perms |= string_to_av_perm(p, p->process_class, + "dyntransition"); + if (!p->process_trans_perms) + goto bad; + for (i = 0; i < info->ocon_num; i++) { rc = next_entry(buf, fp, sizeof(u32)); if (rc < 0) @@ -2101,7 +2144,7 @@ int policydb_read(struct policydb *p, void *fp) goto bad; rt->target_class = le32_to_cpu(buf[0]); } else - rt->target_class = SECCLASS_PROCESS; + rt->target_class = p->process_class; if (!policydb_type_isvalid(p, rt->source_type) || !policydb_type_isvalid(p, rt->target_type) || !policydb_class_isvalid(p, rt->target_class)) { diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h index 55152d4..cdcc570 100644 --- a/security/selinux/ss/policydb.h +++ b/security/selinux/ss/policydb.h @@ -254,7 +254,9 @@ struct policydb { unsigned int reject_unknown : 1; unsigned int allow_unknown : 1; - u32 *undefined_perms; + + u16 process_class; + u32 process_trans_perms; }; extern void policydb_destroy(struct policydb *p); @@ -295,5 +297,8 @@ static inline int next_entry(void *buf, struct policy_file *fp, size_t bytes) return 0; } +extern u16 string_to_security_class(struct policydb *p, const char *name); +extern u32 string_to_av_perm(struct policydb *p, u16 tclass, const char *name); + #endif /* _SS_POLICYDB_H_ */ diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index ff17820..e19baa8 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -70,11 +70,6 @@ unsigned int policydb_loaded_version; int selinux_policycap_netpeer; int selinux_policycap_openperm; -/* - * This is declared in avc.c - */ -extern const struct selinux_class_perm selinux_class_perm; - static DEFINE_RWLOCK(policy_rwlock); static struct sidtab sidtab; @@ -98,6 +93,158 @@ static int context_struct_compute_av(struct context *scontext, u16 tclass, u32 requested, struct av_decision *avd); + +struct selinux_mapping { + u16 value; /* policy value */ + unsigned num_perms; + u32 perms[sizeof(u32) * 8]; +}; + +static struct selinux_mapping *current_mapping; +static u16 current_mapping_size; + +static int selinux_set_mapping(struct policydb *pol, + struct security_class_mapping *map, + struct selinux_mapping **out_map_p, + u16 *out_map_size) +{ + struct selinux_mapping *out_map = NULL; + size_t size = sizeof(struct selinux_mapping); + u16 i, j; + unsigned k; + bool print_unknown_handle = false; + + /* Find number of classes in the input mapping */ + if (!map) + return -EINVAL; + i = 0; + while (map[i].name) + i++; + + /* Allocate space for the class records, plus one for class zero */ + out_map = kcalloc(++i, size, GFP_ATOMIC); + if (!out_map) + return -ENOMEM; + + /* Store the raw class and permission values */ + j = 0; + while (map[j].name) { + struct security_class_mapping *p_in = map + (j++); + struct selinux_mapping *p_out = out_map + j; + + /* An empty class string skips ahead */ + if (!strcmp(p_in->name, "")) { + p_out->num_perms = 0; + continue; + } + + p_out->value = string_to_security_class(pol, p_in->name); + if (!p_out->value) { + printk(KERN_INFO + "SELinux: Class %s not defined in policy.\n", + p_in->name); + if (pol->reject_unknown) + goto err; + p_out->num_perms = 0; + print_unknown_handle = true; + continue; + } + + k = 0; + while (p_in->perms && p_in->perms[k]) { + /* An empty permission string skips ahead */ + if (!*p_in->perms[k]) { + k++; + continue; + } + p_out->perms[k] = string_to_av_perm(pol, p_out->value, + p_in->perms[k]); + if (!p_out->perms[k]) { + printk(KERN_INFO + "SELinux: Permission %s in class %s not defined in policy.\n", + p_in->perms[k], p_in->name); + if (pol->reject_unknown) + goto err; + print_unknown_handle = true; + } + + k++; + } + p_out->num_perms = k; + } + + if (print_unknown_handle) + printk(KERN_INFO "SELinux: the above unknown classes and permissions will be %s\n", + pol->allow_unknown ? "allowed" : "denied"); + + *out_map_p = out_map; + *out_map_size = i; + return 0; +err: + kfree(out_map); + return -EINVAL; +} + +/* + * Get real, policy values from mapped values + */ + +static u16 unmap_class(u16 tclass) +{ + if (tclass < current_mapping_size) + return current_mapping[tclass].value; + + return tclass; +} + +static u32 unmap_perm(u16 tclass, u32 tperm) +{ + if (tclass < current_mapping_size) { + unsigned i; + u32 kperm = 0; + + for (i = 0; i < current_mapping[tclass].num_perms; i++) + if (tperm & (1<allowed & current_mapping[tclass].perms[i]) + result |= 1<allowed = result; + + for (i = 0, result = 0; i < n; i++) + if (avd->auditallow & current_mapping[tclass].perms[i]) + result |= 1<auditallow = result; + + for (i = 0, result = 0; i < n; i++) { + if (avd->auditdeny & current_mapping[tclass].perms[i]) + result |= 1<auditdeny = result; + } +} + + /* * Return the boolean value of a constraint expression * when it is applied to the specified source and target @@ -467,7 +614,6 @@ static int context_struct_compute_av(struct context *scontext, struct class_datum *tclass_datum; struct ebitmap *sattr, *tattr; struct ebitmap_node *snode, *tnode; - const struct selinux_class_perm *kdefs = &selinux_class_perm; unsigned int i, j; /* @@ -477,9 +623,9 @@ static int context_struct_compute_av(struct context *scontext, * to remain in the correct class. */ if (policydb_loaded_version < POLICYDB_VERSION_NLCLASS) - if (tclass >= SECCLASS_NETLINK_ROUTE_SOCKET && - tclass <= SECCLASS_NETLINK_DNRT_SOCKET) - tclass = SECCLASS_NETLINK_SOCKET; + if (tclass >= unmap_class(SECCLASS_NETLINK_ROUTE_SOCKET) && + tclass <= unmap_class(SECCLASS_NETLINK_DNRT_SOCKET)) + tclass = unmap_class(SECCLASS_NETLINK_SOCKET); /* * Initialize the access vectors to the default values. @@ -490,33 +636,11 @@ static int context_struct_compute_av(struct context *scontext, avd->seqno = latest_granting; avd->flags = 0; - /* - * Check for all the invalid cases. - * - tclass 0 - * - tclass > policy and > kernel - * - tclass > policy but is a userspace class - * - tclass > policy but we do not allow unknowns - */ - if (unlikely(!tclass)) - goto inval_class; - if (unlikely(tclass > policydb.p_classes.nprim)) - if (tclass > kdefs->cts_len || - !kdefs->class_to_string[tclass] || - !policydb.allow_unknown) - goto inval_class; - - /* - * Kernel class and we allow unknown so pad the allow decision - * the pad will be all 1 for unknown classes. - */ - if (tclass <= kdefs->cts_len && policydb.allow_unknown) - avd->allowed = policydb.undefined_perms[tclass - 1]; - - /* - * Not in policy. Since decision is completed (all 1 or all 0) return. - */ - if (unlikely(tclass > policydb.p_classes.nprim)) - return 0; + if (unlikely(!tclass || tclass > policydb.p_classes.nprim)) { + if (printk_ratelimit()) + printk(KERN_WARNING "SELinux: Invalid class %hu\n", tclass); + return -EINVAL; + } tclass_datum = policydb.class_val_to_struct[tclass - 1]; @@ -568,8 +692,8 @@ static int context_struct_compute_av(struct context *scontext, * role is changing, then check the (current_role, new_role) * pair. */ - if (tclass == SECCLASS_PROCESS && - (avd->allowed & (PROCESS__TRANSITION | PROCESS__DYNTRANSITION)) && + if (tclass == policydb.process_class && + (avd->allowed & policydb.process_trans_perms) && scontext->role != tcontext->role) { for (ra = policydb.role_allow; ra; ra = ra->next) { if (scontext->role == ra->role && @@ -577,8 +701,7 @@ static int context_struct_compute_av(struct context *scontext, break; } if (!ra) - avd->allowed &= ~(PROCESS__TRANSITION | - PROCESS__DYNTRANSITION); + avd->allowed &= ~policydb.process_trans_perms; } /* @@ -590,21 +713,6 @@ static int context_struct_compute_av(struct context *scontext, tclass, requested, avd); return 0; - -inval_class: - if (!tclass || tclass > kdefs->cts_len || - !kdefs->class_to_string[tclass]) { - if (printk_ratelimit()) - printk(KERN_ERR "SELinux: %s: unrecognized class %d\n", - __func__, tclass); - return -EINVAL; - } - - /* - * Known to the kernel, but not to the policy. - * Handle as a denial (allowed is 0). - */ - return 0; } static int security_validtrans_handle_fail(struct context *ocontext, @@ -636,13 +744,14 @@ out: } int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, - u16 tclass) + u16 orig_tclass) { struct context *ocontext; struct context *ncontext; struct context *tcontext; struct class_datum *tclass_datum; struct constraint_node *constraint; + u16 tclass; int rc = 0; if (!ss_initialized) @@ -650,6 +759,8 @@ int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, read_lock(&policy_rwlock); + tclass = unmap_class(orig_tclass); + /* * Remap extended Netlink classes for old policy versions. * Do this here rather than socket_type_to_security_class() @@ -657,9 +768,9 @@ int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, * to remain in the correct class. */ if (policydb_loaded_version < POLICYDB_VERSION_NLCLASS) - if (tclass >= SECCLASS_NETLINK_ROUTE_SOCKET && - tclass <= SECCLASS_NETLINK_DNRT_SOCKET) - tclass = SECCLASS_NETLINK_SOCKET; + if (tclass >= unmap_class(SECCLASS_NETLINK_ROUTE_SOCKET) && + tclass <= unmap_class(SECCLASS_NETLINK_DNRT_SOCKET)) + tclass = unmap_class(SECCLASS_NETLINK_SOCKET); if (!tclass || tclass > policydb.p_classes.nprim) { printk(KERN_ERR "SELinux: %s: unrecognized class %d\n", @@ -792,6 +903,38 @@ out: } +static int security_compute_av_core(u32 ssid, + u32 tsid, + u16 tclass, + u32 requested, + struct av_decision *avd) +{ + struct context *scontext = NULL, *tcontext = NULL; + int rc = 0; + + scontext = sidtab_search(&sidtab, ssid); + if (!scontext) { + printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + __func__, ssid); + return -EINVAL; + } + tcontext = sidtab_search(&sidtab, tsid); + if (!tcontext) { + printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + __func__, tsid); + return -EINVAL; + } + + rc = context_struct_compute_av(scontext, tcontext, tclass, + requested, avd); + + /* permissive domain? */ + if (ebitmap_get_bit(&policydb.permissive_map, scontext->type)) + avd->flags |= AVD_FLAGS_PERMISSIVE; + + return rc; +} + /** * security_compute_av - Compute access vector decisions. * @ssid: source security identifier @@ -807,12 +950,45 @@ out: */ int security_compute_av(u32 ssid, u32 tsid, - u16 tclass, - u32 requested, + u16 orig_tclass, + u32 orig_requested, struct av_decision *avd) { - struct context *scontext = NULL, *tcontext = NULL; - int rc = 0; + u16 tclass; + u32 requested; + int rc; + + if (!ss_initialized) + goto allow; + + read_lock(&policy_rwlock); + requested = unmap_perm(orig_tclass, orig_requested); + tclass = unmap_class(orig_tclass); + if (unlikely(orig_tclass && !tclass)) { + if (policydb.allow_unknown) + goto allow; + return -EINVAL; + } + rc = security_compute_av_core(ssid, tsid, tclass, requested, avd); + map_decision(orig_tclass, avd, policydb.allow_unknown); + read_unlock(&policy_rwlock); + return rc; +allow: + avd->allowed = 0xffffffff; + avd->auditallow = 0; + avd->auditdeny = 0xffffffff; + avd->seqno = latest_granting; + avd->flags = 0; + return 0; +} + +int security_compute_av_user(u32 ssid, + u32 tsid, + u16 tclass, + u32 requested, + struct av_decision *avd) +{ + int rc; if (!ss_initialized) { avd->allowed = 0xffffffff; @@ -823,29 +999,7 @@ int security_compute_av(u32 ssid, } read_lock(&policy_rwlock); - - scontext = sidtab_search(&sidtab, ssid); - if (!scontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", - __func__, ssid); - rc = -EINVAL; - goto out; - } - tcontext = sidtab_search(&sidtab, tsid); - if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", - __func__, tsid); - rc = -EINVAL; - goto out; - } - - rc = context_struct_compute_av(scontext, tcontext, tclass, - requested, avd); - - /* permissive domain? */ - if (ebitmap_get_bit(&policydb.permissive_map, scontext->type)) - avd->flags |= AVD_FLAGS_PERMISSIVE; -out: + rc = security_compute_av_core(ssid, tsid, tclass, requested, avd); read_unlock(&policy_rwlock); return rc; } @@ -1204,20 +1358,22 @@ out: static int security_compute_sid(u32 ssid, u32 tsid, - u16 tclass, + u16 orig_tclass, u32 specified, - u32 *out_sid) + u32 *out_sid, + bool kern) { struct context *scontext = NULL, *tcontext = NULL, newcontext; struct role_trans *roletr = NULL; struct avtab_key avkey; struct avtab_datum *avdatum; struct avtab_node *node; + u16 tclass; int rc = 0; if (!ss_initialized) { - switch (tclass) { - case SECCLASS_PROCESS: + switch (orig_tclass) { + case SECCLASS_PROCESS: /* kernel value */ *out_sid = ssid; break; default: @@ -1231,6 +1387,11 @@ static int security_compute_sid(u32 ssid, read_lock(&policy_rwlock); + if (kern) + tclass = unmap_class(orig_tclass); + else + tclass = orig_tclass; + scontext = sidtab_search(&sidtab, ssid); if (!scontext) { printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", @@ -1260,13 +1421,11 @@ static int security_compute_sid(u32 ssid, } /* Set the role and type to default values. */ - switch (tclass) { - case SECCLASS_PROCESS: + if (tclass == policydb.process_class) { /* Use the current role and type of process. */ newcontext.role = scontext->role; newcontext.type = scontext->type; - break; - default: + } else { /* Use the well-defined object role. */ newcontext.role = OBJECT_R_VAL; /* Use the type of the related object. */ @@ -1297,8 +1456,7 @@ static int security_compute_sid(u32 ssid, } /* Check for class-specific changes. */ - switch (tclass) { - case SECCLASS_PROCESS: + if (tclass == policydb.process_class) { if (specified & AVTAB_TRANSITION) { /* Look for a role transition rule. */ for (roletr = policydb.role_tr; roletr; @@ -1311,9 +1469,6 @@ static int security_compute_sid(u32 ssid, } } } - break; - default: - break; } /* Set the MLS attributes. @@ -1358,7 +1513,17 @@ int security_transition_sid(u32 ssid, u16 tclass, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, out_sid); + return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, + out_sid, true); +} + +int security_transition_sid_user(u32 ssid, + u32 tsid, + u16 tclass, + u32 *out_sid) +{ + return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, + out_sid, false); } /** @@ -1379,7 +1544,8 @@ int security_member_sid(u32 ssid, u16 tclass, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_MEMBER, out_sid); + return security_compute_sid(ssid, tsid, tclass, AVTAB_MEMBER, out_sid, + false); } /** @@ -1400,144 +1566,8 @@ int security_change_sid(u32 ssid, u16 tclass, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_CHANGE, out_sid); -} - -/* - * Verify that each kernel class that is defined in the - * policy is correct - */ -static int validate_classes(struct policydb *p) -{ - int i, j; - struct class_datum *cladatum; - struct perm_datum *perdatum; - u32 nprim, tmp, common_pts_len, perm_val, pol_val; - u16 class_val; - const struct selinux_class_perm *kdefs = &selinux_class_perm; - const char *def_class, *def_perm, *pol_class; - struct symtab *perms; - bool print_unknown_handle = 0; - - if (p->allow_unknown) { - u32 num_classes = kdefs->cts_len; - p->undefined_perms = kcalloc(num_classes, sizeof(u32), GFP_KERNEL); - if (!p->undefined_perms) - return -ENOMEM; - } - - for (i = 1; i < kdefs->cts_len; i++) { - def_class = kdefs->class_to_string[i]; - if (!def_class) - continue; - if (i > p->p_classes.nprim) { - printk(KERN_INFO - "SELinux: class %s not defined in policy\n", - def_class); - if (p->reject_unknown) - return -EINVAL; - if (p->allow_unknown) - p->undefined_perms[i-1] = ~0U; - print_unknown_handle = 1; - continue; - } - pol_class = p->p_class_val_to_name[i-1]; - if (strcmp(pol_class, def_class)) { - printk(KERN_ERR - "SELinux: class %d is incorrect, found %s but should be %s\n", - i, pol_class, def_class); - return -EINVAL; - } - } - for (i = 0; i < kdefs->av_pts_len; i++) { - class_val = kdefs->av_perm_to_string[i].tclass; - perm_val = kdefs->av_perm_to_string[i].value; - def_perm = kdefs->av_perm_to_string[i].name; - if (class_val > p->p_classes.nprim) - continue; - pol_class = p->p_class_val_to_name[class_val-1]; - cladatum = hashtab_search(p->p_classes.table, pol_class); - BUG_ON(!cladatum); - perms = &cladatum->permissions; - nprim = 1 << (perms->nprim - 1); - if (perm_val > nprim) { - printk(KERN_INFO - "SELinux: permission %s in class %s not defined in policy\n", - def_perm, pol_class); - if (p->reject_unknown) - return -EINVAL; - if (p->allow_unknown) - p->undefined_perms[class_val-1] |= perm_val; - print_unknown_handle = 1; - continue; - } - perdatum = hashtab_search(perms->table, def_perm); - if (perdatum == NULL) { - printk(KERN_ERR - "SELinux: permission %s in class %s not found in policy, bad policy\n", - def_perm, pol_class); - return -EINVAL; - } - pol_val = 1 << (perdatum->value - 1); - if (pol_val != perm_val) { - printk(KERN_ERR - "SELinux: permission %s in class %s has incorrect value\n", - def_perm, pol_class); - return -EINVAL; - } - } - for (i = 0; i < kdefs->av_inherit_len; i++) { - class_val = kdefs->av_inherit[i].tclass; - if (class_val > p->p_classes.nprim) - continue; - pol_class = p->p_class_val_to_name[class_val-1]; - cladatum = hashtab_search(p->p_classes.table, pol_class); - BUG_ON(!cladatum); - if (!cladatum->comdatum) { - printk(KERN_ERR - "SELinux: class %s should have an inherits clause but does not\n", - pol_class); - return -EINVAL; - } - tmp = kdefs->av_inherit[i].common_base; - common_pts_len = 0; - while (!(tmp & 0x01)) { - common_pts_len++; - tmp >>= 1; - } - perms = &cladatum->comdatum->permissions; - for (j = 0; j < common_pts_len; j++) { - def_perm = kdefs->av_inherit[i].common_pts[j]; - if (j >= perms->nprim) { - printk(KERN_INFO - "SELinux: permission %s in class %s not defined in policy\n", - def_perm, pol_class); - if (p->reject_unknown) - return -EINVAL; - if (p->allow_unknown) - p->undefined_perms[class_val-1] |= (1 << j); - print_unknown_handle = 1; - continue; - } - perdatum = hashtab_search(perms->table, def_perm); - if (perdatum == NULL) { - printk(KERN_ERR - "SELinux: permission %s in class %s not found in policy, bad policy\n", - def_perm, pol_class); - return -EINVAL; - } - if (perdatum->value != j + 1) { - printk(KERN_ERR - "SELinux: permission %s in class %s has incorrect value\n", - def_perm, pol_class); - return -EINVAL; - } - } - } - if (print_unknown_handle) - printk(KERN_INFO "SELinux: the above unknown classes and permissions will be %s\n", - (security_get_allow_unknown() ? "allowed" : "denied")); - return 0; + return security_compute_sid(ssid, tsid, tclass, AVTAB_CHANGE, out_sid, + false); } /* Clone the SID into the new SID table. */ @@ -1710,8 +1740,10 @@ int security_load_policy(void *data, size_t len) { struct policydb oldpolicydb, newpolicydb; struct sidtab oldsidtab, newsidtab; + struct selinux_mapping *oldmap, *map = NULL; struct convert_context_args args; u32 seqno; + u16 map_size; int rc = 0; struct policy_file file = { data, len }, *fp = &file; @@ -1721,16 +1753,14 @@ int security_load_policy(void *data, size_t len) avtab_cache_destroy(); return -EINVAL; } - if (policydb_load_isids(&policydb, &sidtab)) { + if (selinux_set_mapping(&policydb, secclass_map, + ¤t_mapping, + ¤t_mapping_size)) { policydb_destroy(&policydb); avtab_cache_destroy(); return -EINVAL; } - /* Verify that the kernel defined classes are correct. */ - if (validate_classes(&policydb)) { - printk(KERN_ERR - "SELinux: the definition of a class is incorrect\n"); - sidtab_destroy(&sidtab); + if (policydb_load_isids(&policydb, &sidtab)) { policydb_destroy(&policydb); avtab_cache_destroy(); return -EINVAL; @@ -1759,13 +1789,9 @@ int security_load_policy(void *data, size_t len) return -ENOMEM; } - /* Verify that the kernel defined classes are correct. */ - if (validate_classes(&newpolicydb)) { - printk(KERN_ERR - "SELinux: the definition of a class is incorrect\n"); - rc = -EINVAL; + if (selinux_set_mapping(&newpolicydb, secclass_map, + &map, &map_size)) goto err; - } rc = security_preserve_bools(&newpolicydb); if (rc) { @@ -1799,6 +1825,9 @@ int security_load_policy(void *data, size_t len) memcpy(&policydb, &newpolicydb, sizeof policydb); sidtab_set(&sidtab, &newsidtab); security_load_policycaps(); + oldmap = current_mapping; + current_mapping = map; + current_mapping_size = map_size; seqno = ++latest_granting; policydb_loaded_version = policydb.policyvers; write_unlock_irq(&policy_rwlock); @@ -1806,6 +1835,7 @@ int security_load_policy(void *data, size_t len) /* Free the old policydb and SID table. */ policydb_destroy(&oldpolicydb); sidtab_destroy(&oldsidtab); + kfree(oldmap); avc_ss_reset(seqno); selnl_notify_policyload(seqno); @@ -1815,6 +1845,7 @@ int security_load_policy(void *data, size_t len) return 0; err: + kfree(map); sidtab_destroy(&newsidtab); policydb_destroy(&newpolicydb); return rc; @@ -2091,7 +2122,7 @@ out_unlock: } for (i = 0, j = 0; i < mynel; i++) { rc = avc_has_perm_noaudit(fromsid, mysids[i], - SECCLASS_PROCESS, + SECCLASS_PROCESS, /* kernel value */ PROCESS__TRANSITION, AVC_STRICT, NULL); if (!rc) @@ -2119,10 +2150,11 @@ out: */ int security_genfs_sid(const char *fstype, char *path, - u16 sclass, + u16 orig_sclass, u32 *sid) { int len; + u16 sclass; struct genfs *genfs; struct ocontext *c; int rc = 0, cmp = 0; @@ -2132,6 +2164,8 @@ int security_genfs_sid(const char *fstype, read_lock(&policy_rwlock); + sclass = unmap_class(orig_sclass); + for (genfs = policydb.genfs; genfs; genfs = genfs->next) { cmp = strcmp(fstype, genfs->fstype); if (cmp <= 0) -- cgit v0.10.2 From 8753f6bec352392b52ed9b5e290afb34379f4612 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Wed, 30 Sep 2009 13:41:02 -0400 Subject: selinux: generate flask headers during kernel build Add a simple utility (scripts/selinux/genheaders) and invoke it to generate the kernel-private class and permission indices in flask.h and av_permissions.h automatically during the kernel build from the security class mapping definitions in classmap.h. Adding new kernel classes and permissions can then be done just by adding them to classmap.h. Signed-off-by: Stephen Smalley Signed-off-by: James Morris diff --git a/scripts/selinux/Makefile b/scripts/selinux/Makefile index ca4b1ec..e8049da 100644 --- a/scripts/selinux/Makefile +++ b/scripts/selinux/Makefile @@ -1,2 +1,2 @@ -subdir-y := mdp -subdir- += mdp +subdir-y := mdp genheaders +subdir- += mdp genheaders diff --git a/scripts/selinux/genheaders/Makefile b/scripts/selinux/genheaders/Makefile new file mode 100644 index 0000000..417b165 --- /dev/null +++ b/scripts/selinux/genheaders/Makefile @@ -0,0 +1,5 @@ +hostprogs-y := genheaders +HOST_EXTRACFLAGS += -Isecurity/selinux/include + +always := $(hostprogs-y) +clean-files := $(hostprogs-y) diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c new file mode 100644 index 0000000..3b16145 --- /dev/null +++ b/scripts/selinux/genheaders/genheaders.c @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include + +struct security_class_mapping { + const char *name; + const char *perms[sizeof(unsigned) * 8 + 1]; +}; + +#include "classmap.h" +#include "initial_sid_to_string.h" + +#define max(x, y) ((x > y) ? x : y) + +const char *progname; + +void usage(void) +{ + printf("usage: %s flask.h av_permissions.h\n", progname); + exit(1); +} + +char *stoupperx(const char *s) +{ + char *s2 = strdup(s); + char *p; + + if (!s2) { + fprintf(stderr, "%s: out of memory\n", progname); + exit(3); + } + + for (p = s2; *p; p++) + *p = toupper(*p); + return s2; +} + +int main(int argc, char *argv[]) +{ + int i, j, k; + int isids_len; + FILE *fout; + + progname = argv[0]; + + if (argc < 3) + usage(); + + fout = fopen(argv[1], "w"); + if (!fout) { + fprintf(stderr, "Could not open %s for writing: %s\n", + argv[1], strerror(errno)); + exit(2); + } + + for (i = 0; secclass_map[i].name; i++) { + struct security_class_mapping *map = &secclass_map[i]; + map->name = stoupperx(map->name); + for (j = 0; map->perms[j]; j++) + map->perms[j] = stoupperx(map->perms[j]); + } + + isids_len = sizeof(initial_sid_to_string) / sizeof (char *); + for (i = 1; i < isids_len; i++) + initial_sid_to_string[i] = stoupperx(initial_sid_to_string[i]); + + fprintf(fout, "/* This file is automatically generated. Do not edit. */\n"); + fprintf(fout, "#ifndef _SELINUX_FLASK_H_\n#define _SELINUX_FLASK_H_\n\n"); + + for (i = 0; secclass_map[i].name; i++) { + struct security_class_mapping *map = &secclass_map[i]; + fprintf(fout, "#define SECCLASS_%s", map->name); + for (j = 0; j < max(1, 40 - strlen(map->name)); j++) + fprintf(fout, " "); + fprintf(fout, "%2d\n", i+1); + } + + fprintf(fout, "\n"); + + for (i = 1; i < isids_len; i++) { + char *s = initial_sid_to_string[i]; + fprintf(fout, "#define SECINITSID_%s", s); + for (j = 0; j < max(1, 40 - strlen(s)); j++) + fprintf(fout, " "); + fprintf(fout, "%2d\n", i); + } + fprintf(fout, "\n#define SECINITSID_NUM %d\n", i-1); + fprintf(fout, "\n#endif\n"); + fclose(fout); + + fout = fopen(argv[2], "w"); + if (!fout) { + fprintf(stderr, "Could not open %s for writing: %s\n", + argv[2], strerror(errno)); + exit(4); + } + + fprintf(fout, "/* This file is automatically generated. Do not edit. */\n"); + fprintf(fout, "#ifndef _SELINUX_AV_PERMISSIONS_H_\n#define _SELINUX_AV_PERMISSIONS_H_\n\n"); + + for (i = 0; secclass_map[i].name; i++) { + struct security_class_mapping *map = &secclass_map[i]; + for (j = 0; map->perms[j]; j++) { + fprintf(fout, "#define %s__%s", map->name, + map->perms[j]); + for (k = 0; k < max(1, 40 - strlen(map->name) - strlen(map->perms[j])); k++) + fprintf(fout, " "); + fprintf(fout, "0x%08xUL\n", (1< Date: Thu, 1 Oct 2009 14:48:23 -0400 Subject: selinux: drop remapping of netlink classes Drop remapping of netlink classes and bypass of permission checking based on netlink message type for policy version < 18. This removes compatibility code introduced when the original single netlink security class used for all netlink sockets was split into finer-grained netlink classes based on netlink protocol and when permission checking was added based on netlink message type in Linux 2.6.8. The only known distribution that shipped with SELinux and policy < 18 was Fedora Core 2, which was EOL'd on 2005-04-11. Given that the remapping code was never updated to address the addition of newer netlink classes, that the corresponding userland support was dropped in 2005, and that the assumptions made by the remapping code about the fixed ordering among netlink classes in the policy may be violated in the future due to the dynamic class/perm discovery support, we should drop this compatibility code now. Signed-off-by: Stephen Smalley Signed-off-by: James Morris diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a985d0b..a29d661 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -91,7 +91,6 @@ #define NUM_SEL_MNT_OPTS 5 -extern unsigned int policydb_loaded_version; extern int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm); extern struct security_operations *security_ops; @@ -4714,10 +4713,7 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb) if (err) return err; - if (policydb_loaded_version >= POLICYDB_VERSION_NLCLASS) - err = selinux_nlmsg_perm(sk, skb); - - return err; + return selinux_nlmsg_perm(sk, skb); } static int selinux_netlink_recv(struct sk_buff *skb, int capability) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index e19baa8..f270e37 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -65,7 +65,6 @@ #include "audit.h" extern void selnl_notify_policyload(u32 seqno); -unsigned int policydb_loaded_version; int selinux_policycap_netpeer; int selinux_policycap_openperm; @@ -617,17 +616,6 @@ static int context_struct_compute_av(struct context *scontext, unsigned int i, j; /* - * Remap extended Netlink classes for old policy versions. - * Do this here rather than socket_type_to_security_class() - * in case a newer policy version is loaded, allowing sockets - * to remain in the correct class. - */ - if (policydb_loaded_version < POLICYDB_VERSION_NLCLASS) - if (tclass >= unmap_class(SECCLASS_NETLINK_ROUTE_SOCKET) && - tclass <= unmap_class(SECCLASS_NETLINK_DNRT_SOCKET)) - tclass = unmap_class(SECCLASS_NETLINK_SOCKET); - - /* * Initialize the access vectors to the default values. */ avd->allowed = 0; @@ -761,17 +749,6 @@ int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, tclass = unmap_class(orig_tclass); - /* - * Remap extended Netlink classes for old policy versions. - * Do this here rather than socket_type_to_security_class() - * in case a newer policy version is loaded, allowing sockets - * to remain in the correct class. - */ - if (policydb_loaded_version < POLICYDB_VERSION_NLCLASS) - if (tclass >= unmap_class(SECCLASS_NETLINK_ROUTE_SOCKET) && - tclass <= unmap_class(SECCLASS_NETLINK_DNRT_SOCKET)) - tclass = unmap_class(SECCLASS_NETLINK_SOCKET); - if (!tclass || tclass > policydb.p_classes.nprim) { printk(KERN_ERR "SELinux: %s: unrecognized class %d\n", __func__, tclass); @@ -1766,7 +1743,6 @@ int security_load_policy(void *data, size_t len) return -EINVAL; } security_load_policycaps(); - policydb_loaded_version = policydb.policyvers; ss_initialized = 1; seqno = ++latest_granting; selinux_complete_init(); @@ -1829,7 +1805,6 @@ int security_load_policy(void *data, size_t len) current_mapping = map; current_mapping_size = map_size; seqno = ++latest_granting; - policydb_loaded_version = policydb.policyvers; write_unlock_irq(&policy_rwlock); /* Free the old policydb and SID table. */ -- cgit v0.10.2 From d6c304055b3cecd4ca865769ac7cea97a320727b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 7 Oct 2009 21:43:22 +0200 Subject: x86, msr: Remove the bkl from msr_open() Remove the big kernel lock from msr_open() as it doesn't protect anything there. The only racy event that can happen here is a concurrent cpu shutdown. So let's look at what could be racy during/after the above event: - The cpu_online() check is racy, but the bkl doesn't help about that anyway it disables preemption but we may be chcking another cpu than the current one. Also the cpu can still become offlined between open and read calls. - The cpu_data(cpu) returns a safe pointer too. It won't be released on cpu offlining. But some fields can be changed from arch/x86/kernel/smpboot.c:remove_siblinginfo() : - phys_proc_id - cpu_core_id Those are not read from msr_open(). What we are checking is the x86_capability that is left untouched on offlining. So this removal looks safe. Signed-off-by: Frederic Weisbecker Cc: John Kacur Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Sven-Thorsten Dietrich LKML-Reference: <1254944602-7382-1-git-send-email-fweisbec@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 7dd9500..c006109 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -174,21 +174,17 @@ static int msr_open(struct inode *inode, struct file *file) { unsigned int cpu = iminor(file->f_path.dentry->d_inode); struct cpuinfo_x86 *c = &cpu_data(cpu); - int ret = 0; - lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { - ret = -ENXIO; /* No such CPU */ - goto out; - } + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + return -ENXIO; /* No such CPU */ + c = &cpu_data(cpu); if (!cpu_has(c, X86_FEATURE_MSR)) - ret = -EIO; /* MSR not supported */ -out: - unlock_kernel(); - return ret; + return -EIO; /* MSR not supported */ + + return 0; } /* -- cgit v0.10.2 From 170a0bc3808909d8ea0f3f9c725c6565efe7f9c4 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Wed, 7 Oct 2009 20:19:32 +0200 Subject: x86, cpuid: Remove the bkl from cpuid_open() Most of the variables are local to the function. It IS possible that for struct cpuinfo_x86 *c c could point to the same area. However, this is used read only. Signed-off-by: John Kacur LKML-Reference: Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index b07af88..ef69284 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -118,8 +118,6 @@ static int cpuid_open(struct inode *inode, struct file *file) struct cpuinfo_x86 *c; int ret = 0; - lock_kernel(); - cpu = iminor(file->f_path.dentry->d_inode); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { ret = -ENXIO; /* No such CPU */ @@ -129,7 +127,6 @@ static int cpuid_open(struct inode *inode, struct file *file) if (c->cpuid_level < 0) ret = -EIO; /* CPUID not supported */ out: - unlock_kernel(); return ret; } -- cgit v0.10.2 From 016e92fbc9ef33689cf654f343a94383d43235e7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 7 Oct 2009 12:47:31 +0200 Subject: perf tools: Unify perf.data mapping and events handling This librarizes the perf.data file mapping and handling in various perf tools, roughly reducing the amount of code and fixing the places that mmap from beginning of the file whereas we want to mmap from the beginning of the data, leading to page fault because the mmap window is too small since the trace info are written in the file too. TODO: - convert perf timechart too Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arjan van de Ven LKML-Reference: <20091007104729.GD5043@nowhere> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 5a42996..495eb6d 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -342,6 +342,7 @@ LIB_H += util/values.h LIB_H += util/sort.h LIB_H += util/hist.h LIB_H += util/thread.h +LIB_H += util/data_map.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -378,6 +379,7 @@ LIB_OBJS += util/trace-event-info.o LIB_OBJS += util/svghelper.o LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o +LIB_OBJS += util/data_map.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 12f8c86..87c4582 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -26,6 +26,7 @@ #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/data_map.h" #include "util/thread.h" #include "util/sort.h" #include "util/hist.h" @@ -37,7 +38,6 @@ static char *dso_list_str, *comm_list_str, *sym_list_str, static struct strlist *dso_list, *comm_list, *sym_list; static int force; -static int input; static int full_paths; static int show_nr_samples; @@ -48,15 +48,11 @@ static struct perf_read_values show_threads_values; static char default_pretty_printing_style[] = "normal"; static char *pretty_printing_style = default_pretty_printing_style; -static unsigned long page_size; -static unsigned long mmap_window = 32; - static int exclude_other = 1; static char callchain_default_opt[] = "fractal,0.5"; -static char __cwd[PATH_MAX]; -static char *cwd = __cwd; +static char *cwd; static int cwdlen; static struct rb_root threads; @@ -815,208 +811,71 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static int -process_event(event_t *event, unsigned long offset, unsigned long head) -{ - trace_event(event); - - switch (event->header.type) { - case PERF_RECORD_SAMPLE: - return process_sample_event(event, offset, head); - - case PERF_RECORD_MMAP: - return process_mmap_event(event, offset, head); - - case PERF_RECORD_COMM: - return process_comm_event(event, offset, head); - - case PERF_RECORD_FORK: - case PERF_RECORD_EXIT: - return process_task_event(event, offset, head); - - case PERF_RECORD_LOST: - return process_lost_event(event, offset, head); - - case PERF_RECORD_READ: - return process_read_event(event, offset, head); - - /* - * We dont process them right now but they are fine: - */ - - case PERF_RECORD_THROTTLE: - case PERF_RECORD_UNTHROTTLE: - return 0; - - default: - return -1; - } - - return 0; -} - -static int __cmd_report(void) +static int sample_type_check(u64 type) { - int ret, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head, shift; - struct stat input_stat; - struct thread *idle; - event_t *event; - uint32_t size; - char *buf; - - idle = register_idle_thread(&threads, &last_match); - thread__comm_adjust(idle); - - if (show_threads) - perf_read_values_init(&show_threads_values); - - input = open(input_name, O_RDONLY); - if (input < 0) { - fprintf(stderr, " failed to open file: %s", input_name); - if (!strcmp(input_name, "perf.data")) - fprintf(stderr, " (try 'perf record' first)"); - fprintf(stderr, "\n"); - exit(-1); - } - - ret = fstat(input, &input_stat); - if (ret < 0) { - perror("failed to stat file"); - exit(-1); - } - - if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { - fprintf(stderr, "file: %s not owned by current user or root\n", input_name); - exit(-1); - } - - if (!input_stat.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } - - header = perf_header__read(input); - head = header->data_offset; - - sample_type = perf_header__sample_type(header); + sample_type = type; if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { if (sort__has_parent) { fprintf(stderr, "selected --sort parent, but no" " callchain data. Did you call" " perf record without -g?\n"); - exit(-1); + return -1; } if (callchain) { fprintf(stderr, "selected -g but no callchain data." " Did you call perf record without" " -g?\n"); - exit(-1); + return -1; } } else if (callchain_param.mode != CHAIN_NONE && !callchain) { callchain = 1; if (register_callchain_param(&callchain_param) < 0) { fprintf(stderr, "Can't register callchain" " params\n"); - exit(-1); + return -1; } } - if (load_kernel() < 0) { - perror("failed to load kernel symbols"); - return EXIT_FAILURE; - } - - if (!full_paths) { - if (getcwd(__cwd, sizeof(__cwd)) == NULL) { - perror("failed to get the current directory"); - return EXIT_FAILURE; - } - cwdlen = strlen(cwd); - } else { - cwd = NULL; - cwdlen = 0; - } - - shift = page_size * (head / page_size); - offset += shift; - head -= shift; - -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); - } - -more: - event = (event_t *)(buf + head); - - size = event->header.size; - if (!size) - size = 8; - - if (head + event->header.size >= page_size * mmap_window) { - int munmap_ret; - - shift = page_size * (head / page_size); - - munmap_ret = munmap(buf, page_size * mmap_window); - assert(munmap_ret == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - dump_printf("\n%p [%p]: event: %d\n", - (void *)(offset + head), - (void *)(long)event->header.size, - event->header.type); - - if (!size || process_event(event, offset, head) < 0) { - - dump_printf("%p [%p]: skipping unknown header type: %d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type); - - total_unknown++; + return 0; +} - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ +static struct perf_file_handler file_handler = { + .process_sample_event = process_sample_event, + .process_mmap_event = process_mmap_event, + .process_comm_event = process_comm_event, + .process_exit_event = process_task_event, + .process_fork_event = process_task_event, + .process_lost_event = process_lost_event, + .process_read_event = process_read_event, + .sample_type_check = sample_type_check, +}; - if (unlikely(head & 7)) - head &= ~7ULL; - size = 8; - } +static int __cmd_report(void) +{ + struct thread *idle; + int ret; - head += size; + idle = register_idle_thread(&threads, &last_match); + thread__comm_adjust(idle); - if (offset + head >= header->data_offset + header->data_size) - goto done; + if (show_threads) + perf_read_values_init(&show_threads_values); - if (offset + head < (unsigned long)input_stat.st_size) - goto more; + register_perf_file_handler(&file_handler); -done: - rc = EXIT_SUCCESS; - close(input); + ret = mmap_dispatch_perf_file(&header, input_name, force, full_paths, + &cwdlen, &cwd); + if (ret) + return ret; dump_printf(" IP events: %10ld\n", total); dump_printf(" mmap events: %10ld\n", total_mmap); dump_printf(" comm events: %10ld\n", total_comm); dump_printf(" fork events: %10ld\n", total_fork); dump_printf(" lost events: %10ld\n", total_lost); - dump_printf(" unknown events: %10ld\n", total_unknown); + dump_printf(" unknown events: %10ld\n", file_handler.total_unknown); if (dump_trace) return 0; @@ -1034,7 +893,7 @@ done: if (show_threads) perf_read_values_destroy(&show_threads_values); - return rc; + return ret; } static int @@ -1177,8 +1036,6 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) { symbol__init(); - page_size = getpagesize(); - argc = parse_options(argc, argv, options, report_usage, 0); setup_sorting(); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 1887138..e1df705 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -11,6 +11,7 @@ #include "util/trace-event.h" #include "util/debug.h" +#include "util/data_map.h" #include #include @@ -20,9 +21,6 @@ #include static char const *input_name = "perf.data"; -static int input; -static unsigned long page_size; -static unsigned long mmap_window = 32; static unsigned long total_comm = 0; @@ -35,6 +33,9 @@ static u64 sample_type; static char default_sort_order[] = "avg, max, switch, runtime"; static char *sort_order = default_sort_order; +static char *cwd; +static int cwdlen; + #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 @@ -1594,129 +1595,43 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) } static int -process_event(event_t *event, unsigned long offset, unsigned long head) +process_lost_event(event_t *event __used, + unsigned long offset __used, + unsigned long head __used) { - trace_event(event); - - nr_events++; - switch (event->header.type) { - case PERF_RECORD_MMAP: - return 0; - case PERF_RECORD_LOST: - nr_lost_chunks++; - nr_lost_events += event->lost.lost; - return 0; - - case PERF_RECORD_COMM: - return process_comm_event(event, offset, head); + nr_lost_chunks++; + nr_lost_events += event->lost.lost; - case PERF_RECORD_EXIT ... PERF_RECORD_READ: - return 0; + return 0; +} - case PERF_RECORD_SAMPLE: - return process_sample_event(event, offset, head); +static int sample_type_check(u64 type) +{ + sample_type = type; - case PERF_RECORD_MAX: - default: + if (!(sample_type & PERF_SAMPLE_RAW)) { + fprintf(stderr, + "No trace sample to read. Did you call perf record " + "without -R?"); return -1; } return 0; } +static struct perf_file_handler file_handler = { + .process_sample_event = process_sample_event, + .process_comm_event = process_comm_event, + .process_lost_event = process_lost_event, + .sample_type_check = sample_type_check, +}; + static int read_events(void) { - int ret, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head = 0; - struct stat perf_stat; - event_t *event; - uint32_t size; - char *buf; - register_idle_thread(&threads, &last_match); + register_perf_file_handler(&file_handler); - input = open(input_name, O_RDONLY); - if (input < 0) { - perror("failed to open file"); - exit(-1); - } - - ret = fstat(input, &perf_stat); - if (ret < 0) { - perror("failed to stat file"); - exit(-1); - } - - if (!perf_stat.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } - header = perf_header__read(input); - head = header->data_offset; - sample_type = perf_header__sample_type(header); - - if (!(sample_type & PERF_SAMPLE_RAW)) - die("No trace sample to read. Did you call perf record " - "without -R?"); - - if (load_kernel() < 0) { - perror("failed to load kernel symbols"); - return EXIT_FAILURE; - } - -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); - } - -more: - event = (event_t *)(buf + head); - - size = event->header.size; - if (!size) - size = 8; - - if (head + event->header.size >= page_size * mmap_window) { - unsigned long shift = page_size * (head / page_size); - int res; - - res = munmap(buf, page_size * mmap_window); - assert(res == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - - if (!size || process_event(event, offset, head) < 0) { - - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ - - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; - } - - head += size; - - if (offset + head < (unsigned long)perf_stat.st_size) - goto more; - - rc = EXIT_SUCCESS; - close(input); - - return rc; + return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); } static void print_bad_events(void) @@ -1934,7 +1849,6 @@ static int __cmd_record(int argc, const char **argv) int cmd_sched(int argc, const char **argv, const char *prefix __used) { symbol__init(); - page_size = getpagesize(); argc = parse_options(argc, argv, sched_options, sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d9abb4a..fb3f3c2 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -12,11 +12,9 @@ #include "util/debug.h" #include "util/trace-event.h" +#include "util/data_map.h" static char const *input_name = "perf.data"; -static int input; -static unsigned long page_size; -static unsigned long mmap_window = 32; static unsigned long total = 0; static unsigned long total_comm = 0; @@ -27,6 +25,9 @@ static struct thread *last_match; static struct perf_header *header; static u64 sample_type; +static char *cwd; +static int cwdlen; + static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) @@ -112,125 +113,32 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static int -process_event(event_t *event, unsigned long offset, unsigned long head) +static int sample_type_check(u64 type) { - trace_event(event); - - switch (event->header.type) { - case PERF_RECORD_MMAP ... PERF_RECORD_LOST: - return 0; - - case PERF_RECORD_COMM: - return process_comm_event(event, offset, head); - - case PERF_RECORD_EXIT ... PERF_RECORD_READ: - return 0; + sample_type = type; - case PERF_RECORD_SAMPLE: - return process_sample_event(event, offset, head); - - case PERF_RECORD_MAX: - default: + if (!(sample_type & PERF_SAMPLE_RAW)) { + fprintf(stderr, + "No trace sample to read. Did you call perf record " + "without -R?"); return -1; } return 0; } +static struct perf_file_handler file_handler = { + .process_sample_event = process_sample_event, + .process_comm_event = process_comm_event, + .sample_type_check = sample_type_check, +}; + static int __cmd_trace(void) { - int ret, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head = 0; - unsigned long shift; - struct stat perf_stat; - event_t *event; - uint32_t size; - char *buf; - register_idle_thread(&threads, &last_match); + register_perf_file_handler(&file_handler); - input = open(input_name, O_RDONLY); - if (input < 0) { - perror("failed to open file"); - exit(-1); - } - - ret = fstat(input, &perf_stat); - if (ret < 0) { - perror("failed to stat file"); - exit(-1); - } - - if (!perf_stat.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } - header = perf_header__read(input); - head = header->data_offset; - sample_type = perf_header__sample_type(header); - - if (!(sample_type & PERF_SAMPLE_RAW)) - die("No trace sample to read. Did you call perf record " - "without -R?"); - - if (load_kernel() < 0) { - perror("failed to load kernel symbols"); - return EXIT_FAILURE; - } - - shift = page_size * (head / page_size); - offset += shift; - head -= shift; - -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); - } - -more: - event = (event_t *)(buf + head); - - if (head + event->header.size >= page_size * mmap_window) { - int res; - - shift = page_size * (head / page_size); - res = munmap(buf, page_size * mmap_window); - assert(res == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - if (!size || process_event(event, offset, head) < 0) { - - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ - - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; - } - - head += size; - - if (offset + head < (unsigned long)perf_stat.st_size) - goto more; - - rc = EXIT_SUCCESS; - close(input); - - return rc; + return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); } static const char * const annotate_usage[] = { @@ -249,7 +157,6 @@ static const struct option options[] = { int cmd_trace(int argc, const char **argv, const char *prefix __used) { symbol__init(); - page_size = getpagesize(); argc = parse_options(argc, argv, options, annotate_usage, 0); if (argc) { diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c new file mode 100644 index 0000000..242b055 --- /dev/null +++ b/tools/perf/util/data_map.c @@ -0,0 +1,222 @@ +#include "data_map.h" +#include "symbol.h" +#include "util.h" +#include "debug.h" + + +static struct perf_file_handler *curr_handler; +static unsigned long mmap_window = 32; +static char __cwd[PATH_MAX]; + +static int +process_event_stub(event_t *event __used, + unsigned long offset __used, + unsigned long head __used) +{ + return 0; +} + +void register_perf_file_handler(struct perf_file_handler *handler) +{ + if (!handler->process_sample_event) + handler->process_sample_event = process_event_stub; + if (!handler->process_mmap_event) + handler->process_mmap_event = process_event_stub; + if (!handler->process_comm_event) + handler->process_comm_event = process_event_stub; + if (!handler->process_fork_event) + handler->process_fork_event = process_event_stub; + if (!handler->process_exit_event) + handler->process_exit_event = process_event_stub; + if (!handler->process_lost_event) + handler->process_lost_event = process_event_stub; + if (!handler->process_read_event) + handler->process_read_event = process_event_stub; + if (!handler->process_throttle_event) + handler->process_throttle_event = process_event_stub; + if (!handler->process_unthrottle_event) + handler->process_unthrottle_event = process_event_stub; + + curr_handler = handler; +} + +static int +process_event(event_t *event, unsigned long offset, unsigned long head) +{ + trace_event(event); + + switch (event->header.type) { + case PERF_RECORD_SAMPLE: + return curr_handler->process_sample_event(event, offset, head); + case PERF_RECORD_MMAP: + return curr_handler->process_mmap_event(event, offset, head); + case PERF_RECORD_COMM: + return curr_handler->process_comm_event(event, offset, head); + case PERF_RECORD_FORK: + return curr_handler->process_fork_event(event, offset, head); + case PERF_RECORD_EXIT: + return curr_handler->process_exit_event(event, offset, head); + case PERF_RECORD_LOST: + return curr_handler->process_lost_event(event, offset, head); + case PERF_RECORD_READ: + return curr_handler->process_read_event(event, offset, head); + case PERF_RECORD_THROTTLE: + return curr_handler->process_throttle_event(event, offset, head); + case PERF_RECORD_UNTHROTTLE: + return curr_handler->process_unthrottle_event(event, offset, head); + default: + curr_handler->total_unknown++; + return -1; + } +} + +int mmap_dispatch_perf_file(struct perf_header **pheader, + const char *input_name, + int force, + int full_paths, + int *cwdlen, + char **cwd) +{ + int ret, rc = EXIT_FAILURE; + struct perf_header *header; + unsigned long head, shift; + unsigned long offset = 0; + struct stat input_stat; + size_t page_size; + u64 sample_type; + event_t *event; + uint32_t size; + int input; + char *buf; + + if (!curr_handler) + die("Forgot to register perf file handler"); + + page_size = getpagesize(); + + input = open(input_name, O_RDONLY); + if (input < 0) { + fprintf(stderr, " failed to open file: %s", input_name); + if (!strcmp(input_name, "perf.data")) + fprintf(stderr, " (try 'perf record' first)"); + fprintf(stderr, "\n"); + exit(-1); + } + + ret = fstat(input, &input_stat); + if (ret < 0) { + perror("failed to stat file"); + exit(-1); + } + + if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { + fprintf(stderr, "file: %s not owned by current user or root\n", + input_name); + exit(-1); + } + + if (!input_stat.st_size) { + fprintf(stderr, "zero-sized file, nothing to do!\n"); + exit(0); + } + + *pheader = perf_header__read(input); + header = *pheader; + head = header->data_offset; + + sample_type = perf_header__sample_type(header); + + if (curr_handler->sample_type_check) + if (curr_handler->sample_type_check(sample_type) < 0) + exit(-1); + + if (load_kernel() < 0) { + perror("failed to load kernel symbols"); + return EXIT_FAILURE; + } + + if (!full_paths) { + if (getcwd(__cwd, sizeof(__cwd)) == NULL) { + perror("failed to get the current directory"); + return EXIT_FAILURE; + } + *cwd = __cwd; + *cwdlen = strlen(*cwd); + } else { + *cwd = NULL; + *cwdlen = 0; + } + + shift = page_size * (head / page_size); + offset += shift; + head -= shift; + +remap: + buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, + MAP_SHARED, input, offset); + if (buf == MAP_FAILED) { + perror("failed to mmap file"); + exit(-1); + } + +more: + event = (event_t *)(buf + head); + + size = event->header.size; + if (!size) + size = 8; + + if (head + event->header.size >= page_size * mmap_window) { + int munmap_ret; + + shift = page_size * (head / page_size); + + munmap_ret = munmap(buf, page_size * mmap_window); + assert(munmap_ret == 0); + + offset += shift; + head -= shift; + goto remap; + } + + size = event->header.size; + + dump_printf("\n%p [%p]: event: %d\n", + (void *)(offset + head), + (void *)(long)event->header.size, + event->header.type); + + if (!size || process_event(event, offset, head) < 0) { + + dump_printf("%p [%p]: skipping unknown header type: %d\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->header.type); + + /* + * assume we lost track of the stream, check alignment, and + * increment a single u64 in the hope to catch on again 'soon'. + */ + + if (unlikely(head & 7)) + head &= ~7ULL; + + size = 8; + } + + head += size; + + if (offset + head >= header->data_offset + header->data_size) + goto done; + + if (offset + head < (unsigned long)input_stat.st_size) + goto more; + +done: + rc = EXIT_SUCCESS; + close(input); + + return rc; +} + + diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h new file mode 100644 index 0000000..716d105 --- /dev/null +++ b/tools/perf/util/data_map.h @@ -0,0 +1,31 @@ +#ifndef __PERF_DATAMAP_H +#define __PERF_DATAMAP_H + +#include "event.h" +#include "header.h" + +typedef int (*event_type_handler_t)(event_t *, unsigned long, unsigned long); + +struct perf_file_handler { + event_type_handler_t process_sample_event; + event_type_handler_t process_mmap_event; + event_type_handler_t process_comm_event; + event_type_handler_t process_fork_event; + event_type_handler_t process_exit_event; + event_type_handler_t process_lost_event; + event_type_handler_t process_read_event; + event_type_handler_t process_throttle_event; + event_type_handler_t process_unthrottle_event; + int (*sample_type_check)(u64 sample_type); + unsigned long total_unknown; +}; + +void register_perf_file_handler(struct perf_file_handler *handler); +int mmap_dispatch_perf_file(struct perf_header **pheader, + const char *input_name, + int force, + int full_paths, + int *cwdlen, + char **cwd); + +#endif -- cgit v0.10.2 From 9a92b479b2f088ee2d3194243f4c8e59b1b8c9c2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 8 Oct 2009 16:37:12 +0200 Subject: perf tools: Improve thread comm resolution in perf sched When we get sched traces that involve a task that was already created before opening the event, we won't have the comm event for it. So if we can't find the comm event for a given thread, we look at the traces that may contain these informations. Before: ata/1:371 | 0.000 ms | 1 | avg: 3988.693 ms | max: 3988.693 ms | kondemand/1:421 | 0.096 ms | 3 | avg: 345.346 ms | max: 1035.989 ms | kondemand/0:420 | 0.025 ms | 3 | avg: 421.332 ms | max: 964.014 ms | :5124:5124 | 0.103 ms | 5 | avg: 74.082 ms | max: 277.194 ms | :6244:6244 | 0.691 ms | 9 | avg: 125.655 ms | max: 271.306 ms | firefox:5080 | 0.924 ms | 5 | avg: 53.833 ms | max: 257.828 ms | npviewer.bin:6225 | 21.871 ms | 53 | avg: 22.462 ms | max: 220.835 ms | :6245:6245 | 9.631 ms | 21 | avg: 41.864 ms | max: 213.349 ms | After: ata/1:371 | 0.000 ms | 1 | avg: 3988.693 ms | max: 3988.693 ms | kondemand/1:421 | 0.096 ms | 3 | avg: 345.346 ms | max: 1035.989 ms | kondemand/0:420 | 0.025 ms | 3 | avg: 421.332 ms | max: 964.014 ms | firefox:5124 | 0.103 ms | 5 | avg: 74.082 ms | max: 277.194 ms | npviewer.bin:6244 | 0.691 ms | 9 | avg: 125.655 ms | max: 271.306 ms | firefox:5080 | 0.924 ms | 5 | avg: 53.833 ms | max: 257.828 ms | npviewer.bin:6225 | 21.871 ms | 53 | avg: 22.462 ms | max: 220.835 ms | npviewer.bin:6245 | 9.631 ms | 21 | avg: 41.864 ms | max: 213.349 ms | Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1255012632-7882-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index e1df705..25b91e7 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1034,6 +1034,36 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) atoms->nb_atoms++; } +static struct thread * +threads__findnew_from_ctx(u32 pid, struct trace_switch_event *switch_event) +{ + struct thread *th; + + th = threads__findnew_nocomm(pid, &threads, &last_match); + if (th->comm) + return th; + + if (pid == switch_event->prev_pid) + thread__set_comm(th, switch_event->prev_comm); + else + thread__set_comm(th, switch_event->next_comm); + return th; +} + +static struct thread * +threads__findnew_from_wakeup(struct trace_wakeup_event *wakeup_event) +{ + struct thread *th; + + th = threads__findnew_nocomm(wakeup_event->pid, &threads, &last_match); + if (th->comm) + return th; + + thread__set_comm(th, wakeup_event->comm); + + return th; +} + static void latency_switch_event(struct trace_switch_event *switch_event, struct event *event __used, @@ -1059,8 +1089,10 @@ latency_switch_event(struct trace_switch_event *switch_event, die("hm, delta: %Ld < 0 ?\n", delta); - sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); - sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); + sched_out = threads__findnew_from_ctx(switch_event->prev_pid, + switch_event); + sched_in = threads__findnew_from_ctx(switch_event->next_pid, + switch_event); out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); if (!out_events) { @@ -1126,7 +1158,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, if (!wakeup_event->success) return; - wakee = threads__findnew(wakeup_event->pid, &threads, &last_match); + wakee = threads__findnew_from_wakeup(wakeup_event); atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); if (!atoms) { thread_atoms_insert(wakee); @@ -1386,8 +1418,10 @@ map_switch_event(struct trace_switch_event *switch_event, die("hm, delta: %Ld < 0 ?\n", delta); - sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); - sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); + sched_out = threads__findnew_from_ctx(switch_event->prev_pid, + switch_event); + sched_in = threads__findnew_from_ctx(switch_event->next_pid, + switch_event); curr_thread[this_cpu] = sched_in; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 3b56aeb..8bd5ca2 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -6,15 +6,17 @@ #include "util.h" #include "debug.h" -static struct thread *thread__new(pid_t pid) +static struct thread *thread__new(pid_t pid, int set_comm) { struct thread *self = calloc(1, sizeof(*self)); if (self != NULL) { self->pid = pid; - self->comm = malloc(32); - if (self->comm) - snprintf(self->comm, 32, ":%d", self->pid); + if (set_comm) { + self->comm = malloc(32); + if (self->comm) + snprintf(self->comm, 32, ":%d", self->pid); + } self->maps = RB_ROOT; INIT_LIST_HEAD(&self->removed_maps); } @@ -50,8 +52,10 @@ static size_t thread__fprintf(struct thread *self, FILE *fp) return ret; } -struct thread * -threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) +static struct thread * +__threads__findnew(pid_t pid, struct rb_root *threads, + struct thread **last_match, + int set_comm) { struct rb_node **p = &threads->rb_node; struct rb_node *parent = NULL; @@ -80,7 +84,8 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) p = &(*p)->rb_right; } - th = thread__new(pid); + th = thread__new(pid, set_comm); + if (th != NULL) { rb_link_node(&th->rb_node, parent, p); rb_insert_color(&th->rb_node, threads); @@ -91,6 +96,19 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) } struct thread * +threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) +{ + return __threads__findnew(pid, threads, last_match, 1); +} + +struct thread * +threads__findnew_nocomm(pid_t pid, struct rb_root *threads, + struct thread **last_match) +{ + return __threads__findnew(pid, threads, last_match, 0); +} + +struct thread * register_idle_thread(struct rb_root *threads, struct thread **last_match) { struct thread *thread = threads__findnew(0, threads, last_match); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 845d9b6..75bc843 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -18,6 +18,9 @@ int thread__set_comm(struct thread *self, const char *comm); struct thread * threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match); struct thread * +threads__findnew_nocomm(pid_t pid, struct rb_root *threads, + struct thread **last_match); +struct thread * register_idle_thread(struct rb_root *threads, struct thread **last_match); void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *self, struct thread *parent); -- cgit v0.10.2 From da21d1b547cbaa2c026cf645753651c25d340923 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 7 Oct 2009 10:49:00 -0300 Subject: perf tools: Up the verbose level for some really verbose stuff Like printing every symbol created. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1254923340-4870-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 35ed97b..8c84320 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -658,10 +658,10 @@ more: if (dump_trace) return 0; - if (verbose >= 3) + if (verbose > 3) threads__fprintf(stdout, &threads); - if (verbose >= 2) + if (verbose > 2) dsos__fprintf(stdout); collapse__resort(); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 87c4582..f57a23b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -880,10 +880,10 @@ static int __cmd_report(void) if (dump_trace) return 0; - if (verbose >= 3) + if (verbose > 3) threads__fprintf(stdout, &threads); - if (verbose >= 2) + if (verbose > 2) dsos__fprintf(stdout); collapse__resort(); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 582ce72..a6887f9 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -74,7 +74,7 @@ static struct symbol *symbol__new(u64 start, u64 len, const char *name, if (!self) return NULL; - if (v >= 2) + if (v > 2) printf("new symbol: %016Lx [%08lx]: %s, hist: %p\n", start, (unsigned long)len, name, self->hist); @@ -685,7 +685,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, } if (self->adjust_symbols) { - if (v >= 2) + if (v > 2) printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); -- cgit v0.10.2 From 2e538c4a1847291cf01218d4fe7bb4dc60fef7cf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 7 Oct 2009 13:48:56 -0300 Subject: perf tools: Improve kernel/modules symbol lookup This removes the ovelapping of vmlinux addresses with modules, using the ELF section name when using --vmlinux and creating a unique DSO name when using /proc/kallsyms ([kernel].N). This is done by creating multiple 'struct map' instances for address ranges backed by DSOs that have just the symbols for that range and a name that is derived from the ELF section name.o Now it is possible to ask for just the symbols in some particular kernel section: $ perf report -m --vmlinux ../build/tip-recvmmsg/vmlinux \ --dsos [kernel].vsyscall_fn | head -15 52.73% Xorg [.] vread_hpet 18.61% firefox [.] vread_hpet 14.50% npviewer.bin [.] vread_hpet 6.83% compiz [.] vread_hpet 5.73% glxgears [.] vread_hpet 0.63% java [.] vread_hpet 0.30% gnome-terminal [.] vread_hpet 0.23% perf [.] vread_hpet 0.18% xchat [.] vread_hpet $ Now we don't have to first lookup the list of modules and then, if it fails, vmlinux symbols, its just a simple lookup for the map then the symbols, just like for threads. Reports generated using /proc/kallsyms and --vmlinux should provide the same results, modulo the DSO name for sections other than ".text". But they don't right now because things like: ffffffff81011c20-ffffffff81012068 system_call ffffffff81011c30-ffffffff81011c9b system_call_after_swapgs ffffffff81011c9c-ffffffff81011cb6 system_call_fastpath ffffffff81011cb7-ffffffff81011cbb ret_from_sys_call I.e. overlapping symbols, again some ASM special case that we have to fixup. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1254934136-8503-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a6887f9..faa84f5 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -26,27 +26,35 @@ enum dso_origin { static void dsos__add(struct dso *dso); static struct dso *dsos__find(const char *name); +static struct map *map__new2(u64 start, struct dso *dso); +static void kernel_maps__insert(struct map *map); static struct rb_root kernel_maps; -static void dso__set_symbols_end(struct dso *self) +static void dso__fixup_sym_end(struct dso *self) { struct rb_node *nd, *prevnd = rb_first(&self->syms); + struct symbol *curr, *prev; if (prevnd == NULL) return; + curr = rb_entry(prevnd, struct symbol, rb_node); + for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { - struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node), - *curr = rb_entry(nd, struct symbol, rb_node); + prev = curr; + curr = rb_entry(nd, struct symbol, rb_node); if (prev->end == prev->start) prev->end = curr->start - 1; - prevnd = nd; } + + /* Last entry */ + if (curr->end == curr->start) + curr->end = roundup(curr->start, 4096); } -static void kernel_maps__fixup_sym_end(void) +static void kernel_maps__fixup_end(void) { struct map *prev, *curr; struct rb_node *nd, *prevnd = rb_first(&kernel_maps); @@ -55,13 +63,17 @@ static void kernel_maps__fixup_sym_end(void) return; curr = rb_entry(prevnd, struct map, rb_node); - dso__set_symbols_end(curr->dso); for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { prev = curr; curr = rb_entry(nd, struct map, rb_node); prev->end = curr->start - 1; - dso__set_symbols_end(curr->dso); + } + + nd = rb_last(&curr->dso->syms); + if (nd) { + struct symbol *sym = rb_entry(nd, struct symbol, rb_node); + curr->end = sym->end; } } @@ -200,13 +212,16 @@ size_t dso__fprintf(struct dso *self, FILE *fp) return ret; } -static int maps__load_kallsyms(symbol_filter_t filter, int use_modules, int v) +/* + * Loads the function entries in /proc/kallsyms into kernel_map->dso, + * so that we can in the next step set the symbol ->end address and then + * call kernel_maps__split_kallsyms. + */ +static int kernel_maps__load_all_kallsyms(int v) { - struct map *map = kernel_map; char *line = NULL; size_t n; FILE *file = fopen("/proc/kallsyms", "r"); - int count = 0; if (file == NULL) goto out_failure; @@ -216,7 +231,7 @@ static int maps__load_kallsyms(symbol_filter_t filter, int use_modules, int v) struct symbol *sym; int line_len, len; char symbol_type; - char *module, *symbol_name; + char *symbol_name; line_len = getline(&line, &n, file); if (line_len < 0) @@ -241,20 +256,55 @@ static int maps__load_kallsyms(symbol_filter_t filter, int use_modules, int v) continue; symbol_name = line + len + 2; - module = strchr(symbol_name, '\t'); - if (module) { - char *module_name_end; + /* + * Will fix up the end later, when we have all symbols sorted. + */ + sym = symbol__new(start, 0, symbol_name, + kernel_map->dso->sym_priv_size, v); + if (sym == NULL) + goto out_delete_line; + + dso__insert_symbol(kernel_map->dso, sym); + } + + free(line); + fclose(file); + + return 0; + +out_delete_line: + free(line); +out_failure: + return -1; +} + +/* + * Split the symbols into maps, making sure there are no overlaps, i.e. the + * kernel range is broken in several maps, named [kernel].N, as we don't have + * the original ELF section names vmlinux have. + */ +static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) +{ + struct map *map = kernel_map; + struct symbol *pos; + int count = 0; + struct rb_node *next = rb_first(&kernel_map->dso->syms); + int kernel_range = 0; + + while (next) { + char *module; + + pos = rb_entry(next, struct symbol, rb_node); + next = rb_next(&pos->rb_node); + + module = strchr(pos->name, '\t'); + if (module) { if (!use_modules) - continue; - *module = '\0'; - module = strchr(module + 1, '['); - if (!module) - continue; - module_name_end = strchr(module + 1, ']'); - if (!module_name_end) - continue; - *(module_name_end + 1) = '\0'; + goto delete_symbol; + + *module++ = '\0'; + if (strcmp(map->dso->name, module)) { map = kernel_maps__find_by_dso_name(module); if (!map) { @@ -263,50 +313,77 @@ static int maps__load_kallsyms(symbol_filter_t filter, int use_modules, int v) return -1; } } - start = map->map_ip(map, start); - } else - map = kernel_map; - /* - * Well fix up the end later, when we have all sorted. - */ - sym = symbol__new(start, 0, symbol_name, - map->dso->sym_priv_size, v); + /* + * So that we look just like we get from .ko files, + * i.e. not prelinked, relative to map->start. + */ + pos->start = map->map_ip(map, pos->start); + pos->end = map->map_ip(map, pos->end); + } else if (map != kernel_map) { + char dso_name[PATH_MAX]; + struct dso *dso; + + snprintf(dso_name, sizeof(dso_name), "[kernel].%d", + kernel_range++); + + dso = dso__new(dso_name, + kernel_map->dso->sym_priv_size); + if (dso == NULL) + return -1; + + map = map__new2(pos->start, dso); + if (map == NULL) { + dso__delete(dso); + return -1; + } - if (sym == NULL) - goto out_delete_line; + map->map_ip = vdso__map_ip; + kernel_maps__insert(map); + ++kernel_range; + } - if (filter && filter(map, sym)) - symbol__delete(sym, map->dso->sym_priv_size); - else { - dso__insert_symbol(map->dso, sym); + if (filter && filter(map, pos)) { +delete_symbol: + rb_erase(&pos->rb_node, &kernel_map->dso->syms); + symbol__delete(pos, kernel_map->dso->sym_priv_size); + } else { + if (map != kernel_map) { + rb_erase(&pos->rb_node, &kernel_map->dso->syms); + dso__insert_symbol(map->dso, pos); + } count++; } } - free(line); - fclose(file); - return count; +} -out_delete_line: - free(line); -out_failure: - return -1; + +static int kernel_maps__load_kallsyms(symbol_filter_t filter, + int use_modules, int v) +{ + if (kernel_maps__load_all_kallsyms(v)) + return -1; + + dso__fixup_sym_end(kernel_map->dso); + + return kernel_maps__split_kallsyms(filter, use_modules); } -static size_t kernel_maps__fprintf(FILE *fp) +static size_t kernel_maps__fprintf(FILE *fp, int v) { size_t printed = fprintf(stderr, "Kernel maps:\n"); struct rb_node *nd; - printed += map__fprintf(kernel_map, fp); - printed += dso__fprintf(kernel_map->dso, fp); - for (nd = rb_first(&kernel_maps); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); + printed += fprintf(fp, "Map:"); printed += map__fprintf(pos, fp); - printed += dso__fprintf(pos->dso, fp); + if (v > 1) { + printed += dso__fprintf(pos->dso, fp); + printed += fprintf(fp, "--\n"); + } } return printed + fprintf(stderr, "END kernel maps\n"); @@ -594,6 +671,9 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, int fd, symbol_filter_t filter, int kernel, int kmodule, int v) { + struct map *curr_map = map; + struct dso *curr_dso = self; + size_t dso_name_len = strlen(self->short_name); Elf_Data *symstrs, *secstrs; uint32_t nr_syms; int err = -1; @@ -660,10 +740,9 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { struct symbol *f; const char *elf_name; - char *demangled; + char *demangled = NULL; int is_label = elf_sym__is_label(&sym); const char *section_name; - u64 sh_offset = 0; if (!is_label && !elf_sym__is_function(&sym)) continue; @@ -677,14 +756,51 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, if (is_label && !elf_sec__is_text(&shdr, secstrs)) continue; + elf_name = elf_sym__name(&sym, symstrs); section_name = elf_sec__name(&shdr, secstrs); - if ((kernel || kmodule)) { - if (strstr(section_name, ".init")) - sh_offset = shdr.sh_offset; + if (kernel || kmodule) { + char dso_name[PATH_MAX]; + + if (strcmp(section_name, + curr_dso->short_name + dso_name_len) == 0) + goto new_symbol; + + if (strcmp(section_name, ".text") == 0) { + curr_map = map; + curr_dso = self; + goto new_symbol; + } + + snprintf(dso_name, sizeof(dso_name), + "%s%s", self->short_name, section_name); + + curr_map = kernel_maps__find_by_dso_name(dso_name); + if (curr_map == NULL) { + u64 start = sym.st_value; + + if (kmodule) + start += map->start + shdr.sh_offset; + + curr_dso = dso__new(dso_name, self->sym_priv_size); + if (curr_dso == NULL) + goto out_elf_end; + curr_map = map__new2(start, curr_dso); + if (curr_map == NULL) { + dso__delete(curr_dso); + goto out_elf_end; + } + curr_map->map_ip = vdso__map_ip; + curr_dso->origin = DSO__ORIG_KERNEL; + kernel_maps__insert(curr_map); + dsos__add(curr_dso); + } else + curr_dso = curr_map->dso; + + goto new_symbol; } - if (self->adjust_symbols) { + if (curr_dso->adjust_symbols) { if (v > 2) printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); @@ -696,25 +812,29 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, * DWARF DW_compile_unit has this, but we don't always have access * to it... */ - elf_name = elf_sym__name(&sym, symstrs); demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); if (demangled != NULL) elf_name = demangled; - - f = symbol__new(sym.st_value + sh_offset, sym.st_size, elf_name, - self->sym_priv_size, v); +new_symbol: + f = symbol__new(sym.st_value, sym.st_size, elf_name, + curr_dso->sym_priv_size, v); free(demangled); if (!f) goto out_elf_end; - if (filter && filter(map, f)) - symbol__delete(f, self->sym_priv_size); + if (filter && filter(curr_map, f)) + symbol__delete(f, curr_dso->sym_priv_size); else { - dso__insert_symbol(self, f); + dso__insert_symbol(curr_dso, f); nr++; } } + /* + * For misannotated, zeroed, ASM function sizes. + */ + if (nr > 0) + dso__fixup_sym_end(self); err = nr; out_elf_end: elf_end(elf); @@ -883,27 +1003,17 @@ static void kernel_maps__insert(struct map *map) struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp) { - /* - * We can't have kernel_map in kernel_maps because it spans an address - * space that includes the modules. The right way to fix this is to - * create several maps, so that we don't have overlapping ranges with - * modules. For now lets look first on the kernel dso. - */ struct map *map = maps__find(&kernel_maps, ip); - struct symbol *sym; + + if (mapp) + *mapp = map; if (map) { ip = map->map_ip(map, ip); - sym = map->dso->find_symbol(map->dso, ip); - } else { - map = kernel_map; - sym = map->dso->find_symbol(map->dso, ip); + return map->dso->find_symbol(map->dso, ip); } - if (mapp) - *mapp = map; - - return sym; + return NULL; } struct map *kernel_maps__find_by_dso_name(const char *name) @@ -994,6 +1104,14 @@ static int dsos__load_modules_sym_dir(char *dirname, last = rb_last(&map->dso->syms); if (last) { struct symbol *sym; + /* + * We do this here as well, even having the + * symbol size found in the symtab because + * misannotated ASM symbols may have the size + * set to zero. + */ + dso__fixup_sym_end(map->dso); + sym = rb_entry(last, struct symbol, rb_node); map->end = map->start + sym->end; } @@ -1163,17 +1281,11 @@ int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, } if (err <= 0) - err = maps__load_kallsyms(filter, use_modules, v); + err = kernel_maps__load_kallsyms(filter, use_modules, v); if (err > 0) { struct rb_node *node = rb_first(&dso->syms); struct symbol *sym = rb_entry(node, struct symbol, rb_node); - /* - * Now that we have all sorted out, just set the ->end of all - * symbols that still don't have it. - */ - dso__set_symbols_end(dso); - kernel_maps__fixup_sym_end(); kernel_map->start = sym->start; node = rb_last(&dso->syms); @@ -1181,14 +1293,16 @@ int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, kernel_map->end = sym->end; dso->origin = DSO__ORIG_KERNEL; + kernel_maps__insert(kernel_map); /* - * XXX See kernel_maps__find_symbol comment - * kernel_maps__insert(kernel_map) + * Now that we have all sorted out, just set the ->end of all + * maps: */ + kernel_maps__fixup_end(); dsos__add(dso); if (v > 0) - kernel_maps__fprintf(stderr); + kernel_maps__fprintf(stderr, v); } return err; -- cgit v0.10.2 From 97ea1a7fa62af0d8d49a0fc12796b0073537c9d8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 8 Oct 2009 21:04:17 +0200 Subject: perf tools: Fix thread comm resolution in perf sched This reverts commit 9a92b479b2f088ee2d3194243f4c8e59b1b8c9c2 ("perf tools: Improve thread comm resolution in perf sched") and fixes the real bug. The bug was elsewhere: We are failing to resolve thread names in perf sched because the table of threads we are building, on top of comm events, has a per process granularity. But perf sched, unlike the other perf tools, needs a per thread granularity as we are profiling every tasks individually. So fix it by building our threads table using the tid instead of the pid as the thread identifier. v2: Revert the previous fix - it is not really needed Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1255028657-11158-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 25b91e7..6b00529 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -638,7 +638,7 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) { struct thread *thread; - thread = threads__findnew(event->comm.pid, &threads, &last_match); + thread = threads__findnew(event->comm.tid, &threads, &last_match); dump_printf("%p [%p]: perf_event_comm: %s:%d\n", (void *)(offset + head), @@ -1034,36 +1034,6 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) atoms->nb_atoms++; } -static struct thread * -threads__findnew_from_ctx(u32 pid, struct trace_switch_event *switch_event) -{ - struct thread *th; - - th = threads__findnew_nocomm(pid, &threads, &last_match); - if (th->comm) - return th; - - if (pid == switch_event->prev_pid) - thread__set_comm(th, switch_event->prev_comm); - else - thread__set_comm(th, switch_event->next_comm); - return th; -} - -static struct thread * -threads__findnew_from_wakeup(struct trace_wakeup_event *wakeup_event) -{ - struct thread *th; - - th = threads__findnew_nocomm(wakeup_event->pid, &threads, &last_match); - if (th->comm) - return th; - - thread__set_comm(th, wakeup_event->comm); - - return th; -} - static void latency_switch_event(struct trace_switch_event *switch_event, struct event *event __used, @@ -1089,10 +1059,8 @@ latency_switch_event(struct trace_switch_event *switch_event, die("hm, delta: %Ld < 0 ?\n", delta); - sched_out = threads__findnew_from_ctx(switch_event->prev_pid, - switch_event); - sched_in = threads__findnew_from_ctx(switch_event->next_pid, - switch_event); + sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); + sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); if (!out_events) { @@ -1158,7 +1126,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, if (!wakeup_event->success) return; - wakee = threads__findnew_from_wakeup(wakeup_event); + wakee = threads__findnew(wakeup_event->pid, &threads, &last_match); atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); if (!atoms) { thread_atoms_insert(wakee); @@ -1418,10 +1386,8 @@ map_switch_event(struct trace_switch_event *switch_event, die("hm, delta: %Ld < 0 ?\n", delta); - sched_out = threads__findnew_from_ctx(switch_event->prev_pid, - switch_event); - sched_in = threads__findnew_from_ctx(switch_event->next_pid, - switch_event); + sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); + sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); curr_thread[this_cpu] = sched_in; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 8bd5ca2..3b56aeb 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -6,17 +6,15 @@ #include "util.h" #include "debug.h" -static struct thread *thread__new(pid_t pid, int set_comm) +static struct thread *thread__new(pid_t pid) { struct thread *self = calloc(1, sizeof(*self)); if (self != NULL) { self->pid = pid; - if (set_comm) { - self->comm = malloc(32); - if (self->comm) - snprintf(self->comm, 32, ":%d", self->pid); - } + self->comm = malloc(32); + if (self->comm) + snprintf(self->comm, 32, ":%d", self->pid); self->maps = RB_ROOT; INIT_LIST_HEAD(&self->removed_maps); } @@ -52,10 +50,8 @@ static size_t thread__fprintf(struct thread *self, FILE *fp) return ret; } -static struct thread * -__threads__findnew(pid_t pid, struct rb_root *threads, - struct thread **last_match, - int set_comm) +struct thread * +threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) { struct rb_node **p = &threads->rb_node; struct rb_node *parent = NULL; @@ -84,8 +80,7 @@ __threads__findnew(pid_t pid, struct rb_root *threads, p = &(*p)->rb_right; } - th = thread__new(pid, set_comm); - + th = thread__new(pid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); rb_insert_color(&th->rb_node, threads); @@ -96,19 +91,6 @@ __threads__findnew(pid_t pid, struct rb_root *threads, } struct thread * -threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) -{ - return __threads__findnew(pid, threads, last_match, 1); -} - -struct thread * -threads__findnew_nocomm(pid_t pid, struct rb_root *threads, - struct thread **last_match) -{ - return __threads__findnew(pid, threads, last_match, 0); -} - -struct thread * register_idle_thread(struct rb_root *threads, struct thread **last_match) { struct thread *thread = threads__findnew(0, threads, last_match); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 75bc843..845d9b6 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -18,9 +18,6 @@ int thread__set_comm(struct thread *self, const char *comm); struct thread * threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match); struct thread * -threads__findnew_nocomm(pid_t pid, struct rb_root *threads, - struct thread **last_match); -struct thread * register_idle_thread(struct rb_root *threads, struct thread **last_match); void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *self, struct thread *parent); -- cgit v0.10.2 From 26dd2cb074d9dc41c9e3cddd7bf175fd0a41febc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 8 Oct 2009 22:07:29 +0200 Subject: perf tools: Provide backward compatibility with previous perf.data version We have merged the trace.info file into perf.data by adding one section in the perf headers. This makes it incompatible with previous version: the new perf tools can't read the older perf.data. To support the previous format, we check the headers size. If they have the same size than in the previous format, then ignore the trace info section that doesn't exist. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1255032449-12022-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 212fade..9aae360 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -287,10 +287,16 @@ struct perf_header *perf_header__read(int fd) do_read(fd, &f_header, sizeof(f_header)); if (f_header.magic != PERF_MAGIC || - f_header.size != sizeof(f_header) || f_header.attr_size != sizeof(f_attr)) die("incompatible file format"); + if (f_header.size != sizeof(f_header)) { + /* Support the previous format */ + if (f_header.size == offsetof(typeof(f_header), trace_info)) + f_header.trace_info.size = 0; + else + die("incompatible file format"); + } nr_attrs = f_header.attrs.size / sizeof(f_attr); lseek(fd, f_header.attrs.offset, SEEK_SET); -- cgit v0.10.2 From 5a943617ef52e9f79cd7cf437aad8870be27aabb Mon Sep 17 00:00:00 2001 From: John Kacur Date: Thu, 8 Oct 2009 17:20:15 +0200 Subject: x86, cpuid: Simplify the code in cpuid_open Peter picked up my patch for tip/x86/cpu that removes the bkl in cpuid_open. Ingo subsequently merged that into tip/master. This patch folds back in tglx's 55968ede164ae523692f00717f50cd926f1382a0 to my patch that removed the bkl. This simplifies the code, and makes it consistent with the changes to kill the bkl in msr.c as well. Originally-by: Thomas Gleixner Signed-off-by: John Kacur Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index ef69284..48e8e65 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -116,18 +116,16 @@ static int cpuid_open(struct inode *inode, struct file *file) { unsigned int cpu; struct cpuinfo_x86 *c; - int ret = 0; cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { - ret = -ENXIO; /* No such CPU */ - goto out; - } + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + return -ENXIO; /* No such CPU */ + c = &cpu_data(cpu); if (c->cpuid_level < 0) - ret = -EIO; /* CPUID not supported */ -out: - return ret; + return -EIO; /* CPUID not supported */ + + return 0; } /* -- cgit v0.10.2 From 04a705df47d1ea27ca2b066f24b1951c51792d0d Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 6 Oct 2009 16:42:08 +0200 Subject: perf_events: Check for filters on fixed counter events Intel fixed counters do not support all the filters possible with a generic counter. Thus, if a fixed counter event is passed but with certain filters set, then the fixed_mode_idx() function must fail and the event must be measured in a generic counter instead. Reject filters are: inv, edge, cnt-mask. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <1254840129-6198-2-git-send-email-eranian@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index ad7ce3f..8d9f854 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -28,9 +28,20 @@ */ #define ARCH_PERFMON_EVENT_MASK 0xffff +/* + * filter mask to validate fixed counter events. + * the following filters disqualify for fixed counters: + * - inv + * - edge + * - cnt-mask + * The other filters are supported by fixed counters. + * The any-thread option is supported starting with v3. + */ +#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 + #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c3..1d16bd6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1349,6 +1349,12 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) if (!x86_pmu.num_events_fixed) return -1; + /* + * fixed counters do not take all possible filters + */ + if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) + return -1; + if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) -- cgit v0.10.2 From b690081d4d3f6a23541493f1682835c3cd5c54a1 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 6 Oct 2009 16:42:09 +0200 Subject: perf_events: Add event constraints support for Intel processors On some Intel processors, not all events can be measured in all counters. Some events can only be measured in one particular counter, for instance. Assigning an event to the wrong counter does not crash the machine but this yields bogus counts, i.e., silent error. This patch changes the event to counter assignment logic to take into account event constraints for Intel P6, Core and Nehalem processors. There is no contraints on Intel Atom. There are constraints on Intel Yonah (Core Duo) but they are not provided in this patch given that this processor is not yet supported by perf_events. As a result of the constraints, it is possible for some event groups to never actually be loaded onto the PMU if they contain two events which can only be measured on a single counter. That situation can be detected with the scaling information extracted with read(). Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <1254840129-6198-3-git-send-email-eranian@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1d16bd6..9c75854 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -77,6 +77,18 @@ struct cpu_hw_events { struct debug_store *ds; }; +struct event_constraint { + unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + int code; +}; + +#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } +#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } + +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->idxmsk[0]; (e)++) + + /* * struct x86_pmu - generic x86 pmu */ @@ -102,6 +114,7 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); + int (*get_event_idx)(struct hw_perf_event *hwc); }; static struct x86_pmu x86_pmu __read_mostly; @@ -110,6 +123,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; +static const struct event_constraint *event_constraint; + /* * Not sure about some of these */ @@ -155,6 +170,16 @@ static u64 p6_pmu_raw_event(u64 hw_event) return hw_event & P6_EVNTSEL_MASK; } +static const struct event_constraint intel_p6_event_constraints[] = +{ + EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT_END +}; /* * Intel PerfMon v3. Used on Core2 and later. @@ -170,6 +195,35 @@ static const u64 intel_perfmon_event_map[] = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; +static const struct event_constraint intel_core_event_constraints[] = +{ + EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ + EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ + EVENT_CONSTRAINT_END +}; + +static const struct event_constraint intel_nehalem_event_constraints[] = +{ + EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ + EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ + EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ + EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ + EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ + EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + static u64 intel_pmu_event_map(int hw_event) { return intel_perfmon_event_map[hw_event]; @@ -932,6 +986,8 @@ static int __hw_perf_event_init(struct perf_event *event) */ hwc->config = ARCH_PERFMON_EVENTSEL_INT; + hwc->idx = -1; + /* * Count user and OS events unless requested not to. */ @@ -1366,6 +1422,45 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) } /* + * generic counter allocator: get next free counter + */ +static int gen_get_event_idx(struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); + return idx == x86_pmu.num_events ? -1 : idx; +} + +/* + * intel-specific counter allocator: check event constraints + */ +static int intel_get_event_idx(struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + const struct event_constraint *event_constraint; + int i, code; + + if (!event_constraint) + goto skip; + + code = hwc->config & 0xff; + + for_each_event_constraint(event_constraint, event_constraint) { + if (code == event_constraint->code) { + for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { + if (!test_and_set_bit(i, cpuc->used_mask)) + return i; + } + return -1; + } + } +skip: + return gen_get_event_idx(hwc); +} + +/* * Find a PMC slot for the freshly enabled / scheduled in event: */ static int x86_pmu_enable(struct perf_event *event) @@ -1402,11 +1497,10 @@ static int x86_pmu_enable(struct perf_event *event) } else { idx = hwc->idx; /* Try to get the previous generic event again */ - if (test_and_set_bit(idx, cpuc->used_mask)) { + if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { try_generic: - idx = find_first_zero_bit(cpuc->used_mask, - x86_pmu.num_events); - if (idx == x86_pmu.num_events) + idx = x86_pmu.get_event_idx(hwc); + if (idx == -1) return -EAGAIN; set_bit(idx, cpuc->used_mask); @@ -1883,6 +1977,7 @@ static struct x86_pmu p6_pmu = { */ .event_bits = 32, .event_mask = (1ULL << 32) - 1, + .get_event_idx = intel_get_event_idx, }; static struct x86_pmu intel_pmu = { @@ -1906,6 +2001,7 @@ static struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .enable_bts = intel_pmu_enable_bts, .disable_bts = intel_pmu_disable_bts, + .get_event_idx = intel_get_event_idx, }; static struct x86_pmu amd_pmu = { @@ -1926,6 +2022,7 @@ static struct x86_pmu amd_pmu = { .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, + .get_event_idx = gen_get_event_idx, }; static int p6_pmu_init(void) @@ -1938,10 +2035,12 @@ static int p6_pmu_init(void) case 7: case 8: case 11: /* Pentium III */ + event_constraint = intel_p6_event_constraints; break; case 9: case 13: /* Pentium M */ + event_constraint = intel_p6_event_constraints; break; default: pr_cont("unsupported p6 CPU model %d ", @@ -2013,12 +2112,14 @@ static int intel_pmu_init(void) sizeof(hw_cache_event_ids)); pr_cont("Core2 events, "); + event_constraint = intel_core_event_constraints; break; default: case 26: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + event_constraint = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; case 28: -- cgit v0.10.2 From fe9081cc9bdabb0be953a39ad977cea14e35bce5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 8 Oct 2009 11:56:07 +0200 Subject: perf, x86: Add simple group validation Refuse to add events when the group wouldn't fit onto the PMU anymore. Naive implementation. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <1254911461.26976.239.camel@twins> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9c75854..9961d845 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -114,7 +114,8 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); - int (*get_event_idx)(struct hw_perf_event *hwc); + int (*get_event_idx)(struct cpu_hw_events *cpuc, + struct hw_perf_event *hwc); }; static struct x86_pmu x86_pmu __read_mostly; @@ -523,7 +524,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL #define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL +#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL #define CORE_EVNTSEL_MASK \ (CORE_EVNTSEL_EVENT_MASK | \ @@ -1390,8 +1391,7 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) x86_pmu_enable_event(hwc, idx); } -static int -fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) +static int fixed_mode_idx(struct hw_perf_event *hwc) { unsigned int hw_event; @@ -1424,9 +1424,9 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) /* * generic counter allocator: get next free counter */ -static int gen_get_event_idx(struct hw_perf_event *hwc) +static int +gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); @@ -1436,16 +1436,16 @@ static int gen_get_event_idx(struct hw_perf_event *hwc) /* * intel-specific counter allocator: check event constraints */ -static int intel_get_event_idx(struct hw_perf_event *hwc) +static int +intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); const struct event_constraint *event_constraint; int i, code; if (!event_constraint) goto skip; - code = hwc->config & 0xff; + code = hwc->config & CORE_EVNTSEL_EVENT_MASK; for_each_event_constraint(event_constraint, event_constraint) { if (code == event_constraint->code) { @@ -1457,26 +1457,22 @@ static int intel_get_event_idx(struct hw_perf_event *hwc) } } skip: - return gen_get_event_idx(hwc); + return gen_get_event_idx(cpuc, hwc); } -/* - * Find a PMC slot for the freshly enabled / scheduled in event: - */ -static int x86_pmu_enable(struct perf_event *event) +static int +x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; int idx; - idx = fixed_mode_idx(event, hwc); + idx = fixed_mode_idx(hwc); if (idx == X86_PMC_IDX_FIXED_BTS) { /* BTS is already occupied. */ if (test_and_set_bit(idx, cpuc->used_mask)) return -EAGAIN; hwc->config_base = 0; - hwc->event_base = 0; + hwc->event_base = 0; hwc->idx = idx; } else if (idx >= 0) { /* @@ -1499,17 +1495,33 @@ static int x86_pmu_enable(struct perf_event *event) /* Try to get the previous generic event again */ if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { try_generic: - idx = x86_pmu.get_event_idx(hwc); + idx = x86_pmu.get_event_idx(cpuc, hwc); if (idx == -1) return -EAGAIN; set_bit(idx, cpuc->used_mask); hwc->idx = idx; } - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; } + return idx; +} + +/* + * Find a PMC slot for the freshly enabled / scheduled in event: + */ +static int x86_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + + idx = x86_schedule_event(cpuc, hwc); + if (idx < 0) + return idx; + perf_events_lapic_init(); x86_pmu.disable(hwc, idx); @@ -2212,11 +2224,47 @@ static const struct pmu pmu = { .unthrottle = x86_pmu_unthrottle, }; +static int +validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct hw_perf_event fake_event = event->hw; + + if (event->pmu != &pmu) + return 0; + + return x86_schedule_event(cpuc, &fake_event); +} + +static int validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct cpu_hw_events fake_pmu; + + memset(&fake_pmu, 0, sizeof(fake_pmu)); + + if (!validate_event(&fake_pmu, leader)) + return -ENOSPC; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(&fake_pmu, sibling)) + return -ENOSPC; + } + + if (!validate_event(&fake_pmu, event)) + return -ENOSPC; + + return 0; +} + const struct pmu *hw_perf_event_init(struct perf_event *event) { int err; err = __hw_perf_event_init(event); + if (!err) { + if (event->group_leader != event) + err = validate_group(event); + } if (err) { if (event->destroy) event->destroy(event); -- cgit v0.10.2 From f3834b9ef68067199486740b31f691afb14dbdf5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Oct 2009 10:12:46 +0200 Subject: x86: Generate cmpxchg build failures Rework the x86 cmpxchg() implementation to generate build failures when used on improper types. Signed-off-by: Peter Zijlstra Acked-by: Linus Torvalds LKML-Reference: <1254771187.21044.22.camel@laptop> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 82ceb78..5371174 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -8,14 +8,50 @@ * you need to test for the feature in boot_cpu_data. */ -#define xchg(ptr, v) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(v), (ptr), sizeof(*(ptr)))) +extern void __xchg_wrong_size(void); + +/* + * Note: no "lock" prefix even on SMP: xchg always implies lock anyway + * Note 2: xchg has side effect, so that attribute volatile is necessary, + * but generally the primitive is invalid, *ptr is output argument. --ANK + */ struct __xchg_dummy { unsigned long a[100]; }; #define __xg(x) ((struct __xchg_dummy *)(x)) +#define __xchg(x, ptr, size) \ +({ \ + __typeof(*(ptr)) __x = (x); \ + switch (size) { \ + case 1: \ + asm volatile("xchgb %b0,%1" \ + : "=q" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("xchgw %w0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("xchgl %0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + default: \ + __xchg_wrong_size(); \ + } \ + __x; \ +}) + +#define xchg(ptr, v) \ + __xchg((v), (ptr), sizeof(*ptr)) + /* * The semantics of XCHGCMP8B are a bit strange, this is why * there is a loop and the loading of %%eax and %%edx has to @@ -71,57 +107,63 @@ static inline void __set_64bit_var(unsigned long long *ptr, (unsigned int)((value) >> 32)) \ : __set_64bit(ptr, ll_low((value)), ll_high((value)))) -/* - * Note: no "lock" prefix even on SMP: xchg always implies lock anyway - * Note 2: xchg has side effect, so that attribute volatile is necessary, - * but generally the primitive is invalid, *ptr is output argument. --ANK - */ -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, - int size) -{ - switch (size) { - case 1: - asm volatile("xchgb %b0,%1" - : "=q" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 2: - asm volatile("xchgw %w0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 4: - asm volatile("xchgl %0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - } - return x; -} +extern void __cmpxchg_wrong_size(void); /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case 1: \ + asm volatile(lock "cmpxchgb %b1,%2" \ + : "=a"(__ret) \ + : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile(lock "cmpxchgw %w1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile(lock "cmpxchgl %1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) + +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) + +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") + +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") #ifdef CONFIG_X86_CMPXCHG #define __HAVE_ARCH_CMPXCHG 1 -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define sync_cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) + +#define cmpxchg(ptr, old, new) \ + __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) #endif #ifdef CONFIG_X86_CMPXCHG64 @@ -133,94 +175,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, (unsigned long long)(n))) #endif -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile(LOCK_PREFIX "cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -/* - * Always use locked operations when touching memory shared with a - * hypervisor, since the system may be SMP even if the guest kernel - * isn't. - */ -static inline unsigned long __sync_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("lock; cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} - static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old, unsigned long long new) diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 52de72e..485ae41 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -3,9 +3,6 @@ #include /* Provides LOCK_PREFIX */ -#define xchg(ptr, v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v), \ - (ptr), sizeof(*(ptr)))) - #define __xg(x) ((volatile long *)(x)) static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) @@ -15,167 +12,118 @@ static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) #define _set_64bit set_64bit +extern void __xchg_wrong_size(void); +extern void __cmpxchg_wrong_size(void); + /* * Note: no "lock" prefix even on SMP: xchg always implies lock anyway * Note 2: xchg has side effect, so that attribute volatile is necessary, * but generally the primitive is invalid, *ptr is output argument. --ANK */ -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, - int size) -{ - switch (size) { - case 1: - asm volatile("xchgb %b0,%1" - : "=q" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 2: - asm volatile("xchgw %w0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 4: - asm volatile("xchgl %k0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - case 8: - asm volatile("xchgq %0,%1" - : "=r" (x) - : "m" (*__xg(ptr)), "0" (x) - : "memory"); - break; - } - return x; -} +#define __xchg(x, ptr, size) \ +({ \ + __typeof(*(ptr)) __x = (x); \ + switch (size) { \ + case 1: \ + asm volatile("xchgb %b0,%1" \ + : "=q" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile("xchgw %w0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile("xchgl %k0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile("xchgq %0,%1" \ + : "=r" (__x) \ + : "m" (*__xg(ptr)), "0" (__x) \ + : "memory"); \ + break; \ + default: \ + __xchg_wrong_size(); \ + } \ + __x; \ +}) + +#define xchg(ptr, v) \ + __xchg((v), (ptr), sizeof(*ptr)) + +#define __HAVE_ARCH_CMPXCHG 1 /* * Atomic compare and exchange. Compare OLD with MEM, if identical, * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. */ +#define __raw_cmpxchg(ptr, old, new, size, lock) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + switch (size) { \ + case 1: \ + asm volatile(lock "cmpxchgb %b1,%2" \ + : "=a"(__ret) \ + : "q"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile(lock "cmpxchgw %w1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile(lock "cmpxchgl %k1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile(lock "cmpxchgq %1,%2" \ + : "=a"(__ret) \ + : "r"(__new), "m"(*__xg(ptr)), "0"(__old) \ + : "memory"); \ + break; \ + default: \ + __cmpxchg_wrong_size(); \ + } \ + __ret; \ +}) -#define __HAVE_ARCH_CMPXCHG 1 +#define __cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX) -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 8: - asm volatile(LOCK_PREFIX "cmpxchgq %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define __sync_cmpxchg(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "lock; ") -/* - * Always use locked operations when touching memory shared with a - * hypervisor, since the system may be SMP even if the guest kernel - * isn't. - */ -static inline unsigned long __sync_cmpxchg(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("lock; cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("lock; cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("lock; cmpxchgl %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define __cmpxchg_local(ptr, old, new, size) \ + __raw_cmpxchg((ptr), (old), (new), (size), "") -static inline unsigned long __cmpxchg_local(volatile void *ptr, - unsigned long old, - unsigned long new, int size) -{ - unsigned long prev; - switch (size) { - case 1: - asm volatile("cmpxchgb %b1,%2" - : "=a"(prev) - : "q"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 2: - asm volatile("cmpxchgw %w1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 4: - asm volatile("cmpxchgl %k1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - case 8: - asm volatile("cmpxchgq %1,%2" - : "=a"(prev) - : "r"(new), "m"(*__xg(ptr)), "0"(old) - : "memory"); - return prev; - } - return old; -} +#define cmpxchg(ptr, old, new) \ + __cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define sync_cmpxchg(ptr, old, new) \ + __sync_cmpxchg((ptr), (old), (new), sizeof(*ptr)) + +#define cmpxchg_local(ptr, old, new) \ + __cmpxchg_local((ptr), (old), (new), sizeof(*ptr)) -#define cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr)))) #define cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg((ptr), (o), (n)); \ }) -#define cmpxchg_local(ptr, o, n) \ - ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) -#define sync_cmpxchg(ptr, o, n) \ - ((__typeof__(*(ptr)))__sync_cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), \ - sizeof(*(ptr)))) + #define cmpxchg64_local(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ -- cgit v0.10.2 From 89eda06837094ce9f34fae269b8773fcfd70f046 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 4 Oct 2009 21:49:47 +0900 Subject: LSM: Add security_path_chmod() and security_path_chown(). This patch allows pathname based LSM modules to check chmod()/chown() operations. Since notify_change() does not receive "struct vfsmount *", we add security_path_chmod() and security_path_chown() to the caller of notify_change(). These hooks are used by TOMOYO. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris diff --git a/fs/open.c b/fs/open.c index 4f01e06..b5c294d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -616,6 +616,9 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) err = mnt_want_write_file(file); if (err) goto out_putf; + err = security_path_chmod(dentry, file->f_vfsmnt, mode); + if (err) + goto out_drop_write; mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; @@ -623,6 +626,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; err = notify_change(dentry, &newattrs); mutex_unlock(&inode->i_mutex); +out_drop_write: mnt_drop_write(file->f_path.mnt); out_putf: fput(file); @@ -645,6 +649,9 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) error = mnt_want_write(path.mnt); if (error) goto dput_and_out; + error = security_path_chmod(path.dentry, path.mnt, mode); + if (error) + goto out_drop_write; mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; @@ -652,6 +659,7 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; error = notify_change(path.dentry, &newattrs); mutex_unlock(&inode->i_mutex); +out_drop_write: mnt_drop_write(path.mnt); dput_and_out: path_put(&path); @@ -700,7 +708,9 @@ SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(path.dentry, user, group); + error = security_path_chown(&path, user, group); + if (!error) + error = chown_common(path.dentry, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -725,7 +735,9 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(path.dentry, user, group); + error = security_path_chown(&path, user, group); + if (!error) + error = chown_common(path.dentry, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -744,7 +756,9 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(path.dentry, user, group); + error = security_path_chown(&path, user, group); + if (!error) + error = chown_common(path.dentry, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -767,7 +781,9 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) goto out_fput; dentry = file->f_path.dentry; audit_inode(NULL, dentry); - error = chown_common(dentry, user, group); + error = security_path_chown(&file->f_path, user, group); + if (!error) + error = chown_common(dentry, user, group); mnt_drop_write(file->f_path.mnt); out_fput: fput(file); diff --git a/include/linux/security.h b/include/linux/security.h index 239e40d..c8a584c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -447,6 +447,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @new_dir contains the path structure for parent of the new link. * @new_dentry contains the dentry structure of the new link. * Return 0 if permission is granted. + * @path_chmod: + * Check for permission to change DAC's permission of a file or directory. + * @dentry contains the dentry structure. + * @mnt contains the vfsmnt structure. + * @mode contains DAC's mode. + * Return 0 if permission is granted. + * @path_chown: + * Check for permission to change owner/group of a file or directory. + * @path contains the path structure. + * @uid contains new owner's ID. + * @gid contains new group's ID. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -1488,6 +1500,9 @@ struct security_operations { struct dentry *new_dentry); int (*path_rename) (struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); + int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt, + mode_t mode); + int (*path_chown) (struct path *path, uid_t uid, gid_t gid); #endif int (*inode_alloc_security) (struct inode *inode); @@ -2952,6 +2967,9 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); +int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, + mode_t mode); +int security_path_chown(struct path *path, uid_t uid, gid_t gid); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) { @@ -3001,6 +3019,18 @@ static inline int security_path_rename(struct path *old_dir, { return 0; } + +static inline int security_path_chmod(struct dentry *dentry, + struct vfsmount *mnt, + mode_t mode) +{ + return 0; +} + +static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid) +{ + return 0; +} #endif /* CONFIG_SECURITY_PATH */ #ifdef CONFIG_KEYS diff --git a/security/capability.c b/security/capability.c index fce07a7..09279a8 100644 --- a/security/capability.c +++ b/security/capability.c @@ -308,6 +308,17 @@ static int cap_path_truncate(struct path *path, loff_t length, { return 0; } + +static int cap_path_chmod(struct dentry *dentry, struct vfsmount *mnt, + mode_t mode) +{ + return 0; +} + +static int cap_path_chown(struct path *path, uid_t uid, gid_t gid) +{ + return 0; +} #endif static int cap_file_permission(struct file *file, int mask) @@ -977,6 +988,8 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, path_link); set_to_cap_if_null(ops, path_rename); set_to_cap_if_null(ops, path_truncate); + set_to_cap_if_null(ops, path_chmod); + set_to_cap_if_null(ops, path_chown); #endif set_to_cap_if_null(ops, file_permission); set_to_cap_if_null(ops, file_alloc_security); diff --git a/security/security.c b/security/security.c index c4c6732..5259270 100644 --- a/security/security.c +++ b/security/security.c @@ -434,6 +434,21 @@ int security_path_truncate(struct path *path, loff_t length, return 0; return security_ops->path_truncate(path, length, time_attrs); } + +int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, + mode_t mode) +{ + if (unlikely(IS_PRIVATE(dentry->d_inode))) + return 0; + return security_ops->path_chmod(dentry, mnt, mode); +} + +int security_path_chown(struct path *path, uid_t uid, gid_t gid) +{ + if (unlikely(IS_PRIVATE(path->dentry->d_inode))) + return 0; + return security_ops->path_chown(path, uid, gid); +} #endif int security_inode_create(struct inode *dir, struct dentry *dentry, int mode) -- cgit v0.10.2 From 8b8efb44033c7e86b3dc76f825c693ec92ae30e9 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 4 Oct 2009 21:49:48 +0900 Subject: LSM: Add security_path_chroot(). This patch allows pathname based LSM modules to check chroot() operations. This hook is used by TOMOYO. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris diff --git a/fs/open.c b/fs/open.c index b5c294d..201041d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -587,6 +587,9 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) error = -EPERM; if (!capable(CAP_SYS_CHROOT)) goto dput_and_out; + error = security_path_chroot(&path); + if (error) + goto dput_and_out; set_fs_root(current->fs, &path); error = 0; diff --git a/include/linux/security.h b/include/linux/security.h index c8a584c..ed0faea 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -459,6 +459,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @uid contains new owner's ID. * @gid contains new group's ID. * Return 0 if permission is granted. + * @path_chroot: + * Check for permission to change root directory. + * @path contains the path structure. + * Return 0 if permission is granted. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -1503,6 +1507,7 @@ struct security_operations { int (*path_chmod) (struct dentry *dentry, struct vfsmount *mnt, mode_t mode); int (*path_chown) (struct path *path, uid_t uid, gid_t gid); + int (*path_chroot) (struct path *path); #endif int (*inode_alloc_security) (struct inode *inode); @@ -2970,6 +2975,7 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry, int security_path_chmod(struct dentry *dentry, struct vfsmount *mnt, mode_t mode); int security_path_chown(struct path *path, uid_t uid, gid_t gid); +int security_path_chroot(struct path *path); #else /* CONFIG_SECURITY_PATH */ static inline int security_path_unlink(struct path *dir, struct dentry *dentry) { @@ -3031,6 +3037,11 @@ static inline int security_path_chown(struct path *path, uid_t uid, gid_t gid) { return 0; } + +static inline int security_path_chroot(struct path *path) +{ + return 0; +} #endif /* CONFIG_SECURITY_PATH */ #ifdef CONFIG_KEYS diff --git a/security/capability.c b/security/capability.c index 09279a8..4f3ab47 100644 --- a/security/capability.c +++ b/security/capability.c @@ -319,6 +319,11 @@ static int cap_path_chown(struct path *path, uid_t uid, gid_t gid) { return 0; } + +static int cap_path_chroot(struct path *root) +{ + return 0; +} #endif static int cap_file_permission(struct file *file, int mask) @@ -990,6 +995,7 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, path_truncate); set_to_cap_if_null(ops, path_chmod); set_to_cap_if_null(ops, path_chown); + set_to_cap_if_null(ops, path_chroot); #endif set_to_cap_if_null(ops, file_permission); set_to_cap_if_null(ops, file_alloc_security); diff --git a/security/security.c b/security/security.c index 5259270..2797573 100644 --- a/security/security.c +++ b/security/security.c @@ -449,6 +449,11 @@ int security_path_chown(struct path *path, uid_t uid, gid_t gid) return 0; return security_ops->path_chown(path, uid, gid); } + +int security_path_chroot(struct path *path) +{ + return security_ops->path_chroot(path); +} #endif int security_inode_create(struct inode *dir, struct dentry *dentry, int mode) -- cgit v0.10.2 From a27ab9f26b729326778271c1efd895aef4fda1c4 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 4 Oct 2009 21:49:49 +0900 Subject: LSM: Pass original mount flags to security_sb_mount(). This patch allows LSM modules to determine based on original mount flags passed to mount(). A LSM module can get masked mount flags (if needed) by flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); Signed-off-by: Tetsuo Handa Signed-off-by: James Morris diff --git a/fs/namespace.c b/fs/namespace.c index bdc3cb4..7d70d63 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1921,6 +1921,16 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, if (data_page) ((char *)data_page)[PAGE_SIZE - 1] = 0; + /* ... and get the mountpoint */ + retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); + if (retval) + return retval; + + retval = security_sb_mount(dev_name, &path, + type_page, flags, data_page); + if (retval) + goto dput_out; + /* Default to relatime unless overriden */ if (!(flags & MS_NOATIME)) mnt_flags |= MNT_RELATIME; @@ -1945,16 +1955,6 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | MS_STRICTATIME); - /* ... and get the mountpoint */ - retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); - if (retval) - return retval; - - retval = security_sb_mount(dev_name, &path, - type_page, flags, data_page); - if (retval) - goto dput_out; - if (flags & MS_REMOUNT) retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, data_page); -- cgit v0.10.2 From 3bb258bf430d29a24350fe4f44f8bf07b7b7a8f6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 4 Oct 2009 17:53:29 -0700 Subject: ftrace.c: Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - Remove prefixes from pr_, use pr_fmt(fmt). No change in output. Signed-off-by: Joe Perches Acked-by: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <9b377eefae9e28c599dd4a17bdc81172965e9931.1254701151.git.joe@perches.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9dbb527..25e6f5f 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -9,6 +9,8 @@ * the dangers of modifying code on the run. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -336,15 +338,15 @@ int __init ftrace_dyn_arch_init(void *data) switch (faulted) { case 0: - pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); + pr_info("converting mcount calls to 0f 1f 44 00 00\n"); memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); break; case 1: - pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); + pr_info("converting mcount calls to 66 66 66 66 90\n"); memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); break; case 2: - pr_info("ftrace: converting mcount calls to jmp . + 5\n"); + pr_info("converting mcount calls to jmp . + 5\n"); memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); break; } -- cgit v0.10.2 From 3c355863fb32070a2800f41106519c5c3038623a Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 4 Oct 2009 17:53:40 -0700 Subject: testmmiotrace.c: Add and use pr_fmt(fmt) - Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt. - Strip MODULE_NAME from pr_s. - Remove MODULE_NAME definition. Signed-off-by: Joe Perches LKML-Reference: <3bb66cc7f85f77b9416902e1be7076f7e3f4ad48.1254701151.git.joe@perches.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 427fd1b..8565d94 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -1,12 +1,13 @@ /* * Written by Pekka Paalanen, 2008-2009 */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include -#define MODULE_NAME "testmmiotrace" - static unsigned long mmio_address; module_param(mmio_address, ulong, 0); MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " @@ -30,7 +31,7 @@ static unsigned v32(unsigned i) static void do_write_test(void __iomem *p) { unsigned int i; - pr_info(MODULE_NAME ": write test.\n"); + pr_info("write test.\n"); mmiotrace_printk("Write test.\n"); for (i = 0; i < 256; i++) @@ -47,7 +48,7 @@ static void do_read_test(void __iomem *p) { unsigned int i; unsigned errs[3] = { 0 }; - pr_info(MODULE_NAME ": read test.\n"); + pr_info("read test.\n"); mmiotrace_printk("Read test.\n"); for (i = 0; i < 256; i++) @@ -68,7 +69,7 @@ static void do_read_test(void __iomem *p) static void do_read_far_test(void __iomem *p) { - pr_info(MODULE_NAME ": read far test.\n"); + pr_info("read far test.\n"); mmiotrace_printk("Read far test.\n"); ioread32(p + read_far); @@ -78,7 +79,7 @@ static void do_test(unsigned long size) { void __iomem *p = ioremap_nocache(mmio_address, size); if (!p) { - pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); + pr_err("could not ioremap, aborting.\n"); return; } mmiotrace_printk("ioremap returned %p.\n", p); @@ -94,24 +95,22 @@ static int __init init(void) unsigned long size = (read_far) ? (8 << 20) : (16 << 10); if (mmio_address == 0) { - pr_err(MODULE_NAME ": you have to use the module argument " - "mmio_address.\n"); - pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" - " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); + pr_err("you have to use the module argument mmio_address.\n"); + pr_err("DO NOT LOAD THIS MODULE UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!\n"); return -ENXIO; } - pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " - "address space, and writing 16 kB of rubbish in there.\n", - size >> 10, mmio_address); + pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " + "and writing 16 kB of rubbish in there.\n", + size >> 10, mmio_address); do_test(size); - pr_info(MODULE_NAME ": All done.\n"); + pr_info("All done.\n"); return 0; } static void __exit cleanup(void) { - pr_debug(MODULE_NAME ": unloaded.\n"); + pr_debug("unloaded.\n"); } module_init(init); -- cgit v0.10.2 From 7e4ff9e3e8f88de8a8536f43294cd32b4e7d9123 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 12 Oct 2009 07:56:03 +0200 Subject: perf tools: Fix counter sample frequency breakage Commit 42e59d7d19dc4b4 switched to a default sample frequency of 1KHz, which overrides any user supplied count, causing sched, top and timechart to miss events due to their discrete events being flagged PERF_SAMPLE_PERIOD. Override default sample frequency when the user profides a period count, and make both record and top honor that user supplied option. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: Arjan van de Ven LKML-Reference: <1255326963.15107.2.camel@marge.simson.net> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 59af03d..4e3a374 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -26,7 +26,7 @@ static int fd[MAX_NR_CPUS][MAX_COUNTERS]; -static long default_interval = 100000; +static long default_interval = 0; static int nr_cpus = 0; static unsigned int page_size; @@ -730,6 +730,18 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) attrs[0].config = PERF_COUNT_HW_CPU_CYCLES; } + /* + * User specified count overrides default frequency. + */ + if (default_interval) + freq = 0; + else if (freq) { + default_interval = freq; + } else { + fprintf(stderr, "frequency and count are zero, aborting\n"); + exit(EXIT_FAILURE); + } + for (counter = 0; counter < nr_counters; counter++) { if (attrs[counter].sample_period) continue; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index d978dc9..c0f69e8 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -57,7 +57,7 @@ static int fd[MAX_NR_CPUS][MAX_COUNTERS]; static int system_wide = 0; -static int default_interval = 100000; +static int default_interval = 0; static int count_filter = 5; static int print_entries = 15; @@ -975,7 +975,13 @@ static void start_counter(int i, int counter) attr = attrs + counter; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; - attr->freq = freq; + + if (freq) { + attr->sample_type |= PERF_SAMPLE_PERIOD; + attr->freq = 1; + attr->sample_freq = freq; + } + attr->inherit = (cpu < 0) && inherit; try_again: @@ -1130,11 +1136,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (argc) usage_with_options(top_usage, options); - if (freq) { - default_interval = freq; - freq = 1; - } - /* CPU and PID are mutually exclusive */ if (target_pid != -1 && profile_cpu != -1) { printf("WARNING: PID switch overriding CPU\n"); @@ -1151,6 +1152,19 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) parse_symbols(); parse_source(sym_filter_entry); + + /* + * User specified count overrides default frequency. + */ + if (default_interval) + freq = 0; + else if (freq) { + default_interval = freq; + } else { + fprintf(stderr, "frequency and count are zero, aborting\n"); + exit(EXIT_FAILURE); + } + /* * Fill in the ones not specifically initialized via -c: */ -- cgit v0.10.2 From 55ffb7a6bd45d0083ffb132381cb46964a4afe01 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Sat, 10 Oct 2009 14:46:04 +0200 Subject: perf sched: Add -C option to measure on a specific CPU To refresh, trying to sched record only one CPU results in bogus latencies as below. I fixed^Wmade it stop doing the bad thing today, by following task migration events properly. Before: marge:/root/tmp # taskset -c 1 perf sched record -C 0 -- sleep 10 marge:/root/tmp # perf sched lat ----------------------------------------------------------------------------------------- Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | ----------------------------------------------------------------------------------------- Xorg:4943 | 1.290 ms | 1 | avg: 1670.132 ms | max: 1670.132 ms | hald-addon-stor:3569 | 0.091 ms | 3 | avg: 658.609 ms | max: 1975.797 ms | hald-addon-stor:3573 | 0.209 ms | 4 | avg: 499.138 ms | max: 1990.565 ms | audispd:4270 | 0.012 ms | 1 | avg: 0.015 ms | max: 0.015 ms | .... marge:/root/tmp # perf sched trace|grep 'Xorg:4943' swapper-0 [000] 401.184013288: sched_stat_runtime: task: Xorg:4943 runtime: 1233188 [ns], vruntime: 19105169779 [ns] rt2870TimerQHan-4947 [000] 402.854140127: sched_stat_wait: task: Xorg:4943 wait: 580073 [ns] rt2870TimerQHan-4947 [000] 402.854141770: sched_migrate_task: task Xorg:4943 [140] from: 1 to: 0 rt2870TimerQHan-4947 [000] 402.854143854: sched_stat_wait: task: Xorg:4943 wait: 0 [ns] rt2870TimerQHan-4947 [000] 402.854145397: sched_switch: task rt2870TimerQHan:4947 [140] (D) ==> Xorg:4943 [140] Xorg-4943 [000] 402.854193133: sched_stat_runtime: task: Xorg:4943 runtime: 56546 [ns], vruntime: 11766332500 [ns] Xorg-4943 [000] 402.854196842: sched_switch: task Xorg:4943 [140] (S) ==> swapper:0 [140] After: marge:/root/tmp # taskset -c 1 perf sched record -C 0 -- sleep 10 marge:/root/tmp # perf sched lat ----------------------------------------------------------------------------------------- Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | ----------------------------------------------------------------------------------------- amarokapp:11150 | 271.297 ms | 878 | avg: 0.130 ms | max: 1.057 ms | konsole:5965 | 1.370 ms | 12 | avg: 0.092 ms | max: 0.855 ms | Xorg:4943 | 179.980 ms | 1109 | avg: 0.087 ms | max: 1.206 ms | hald-addon-stor:3574 | 0.212 ms | 9 | avg: 0.040 ms | max: 0.169 ms | hald-addon-stor:3570 | 0.223 ms | 9 | avg: 0.037 ms | max: 0.223 ms | klauncher:5864 | 0.550 ms | 8 | avg: 0.032 ms | max: 0.048 ms | The 'Maximum delay ms' results are now sane. Signed-off-by: Mike Galbraith LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 6b00529..387a442 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -33,6 +33,8 @@ static u64 sample_type; static char default_sort_order[] = "avg, max, switch, runtime"; static char *sort_order = default_sort_order; +static int profile_cpu = -1; + static char *cwd; static int cwdlen; @@ -75,6 +77,7 @@ enum sched_event_type { SCHED_EVENT_RUN, SCHED_EVENT_SLEEP, SCHED_EVENT_WAKEUP, + SCHED_EVENT_MIGRATION, }; struct sched_atom { @@ -399,6 +402,8 @@ process_sched_event(struct task_desc *this_task __used, struct sched_atom *atom) ret = sem_post(atom->wait_sem); BUG_ON(ret); break; + case SCHED_EVENT_MIGRATION: + break; default: BUG_ON(1); } @@ -746,6 +751,22 @@ struct trace_fork_event { u32 child_pid; }; +struct trace_migrate_task_event { + u32 size; + + u16 common_type; + u8 common_flags; + u8 common_preempt_count; + u32 common_pid; + u32 common_tgid; + + char comm[16]; + u32 pid; + + u32 prio; + u32 cpu; +}; + struct trace_sched_handler { void (*switch_event)(struct trace_switch_event *, struct event *, @@ -770,6 +791,12 @@ struct trace_sched_handler { int cpu, u64 timestamp, struct thread *thread); + + void (*migrate_task_event)(struct trace_migrate_task_event *, + struct event *, + int cpu, + u64 timestamp, + struct thread *thread); }; @@ -1140,7 +1167,12 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, atom = list_entry(atoms->work_list.prev, struct work_atom, list); - if (atom->state != THREAD_SLEEPING) + /* + * You WILL be missing events if you've recorded only + * one CPU, or are only looking at only one, so don't + * make useless noise. + */ + if (profile_cpu == -1 && atom->state != THREAD_SLEEPING) nr_state_machine_bugs++; nr_timestamps++; @@ -1153,11 +1185,51 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, atom->wake_up_time = timestamp; } +static void +latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, + struct event *__event __used, + int cpu __used, + u64 timestamp, + struct thread *thread __used) +{ + struct work_atoms *atoms; + struct work_atom *atom; + struct thread *migrant; + + /* + * Only need to worry about migration when profiling one CPU. + */ + if (profile_cpu == -1) + return; + + migrant = threads__findnew(migrate_task_event->pid, &threads, &last_match); + atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); + if (!atoms) { + thread_atoms_insert(migrant); + register_pid(migrant->pid, migrant->comm); + atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); + if (!atoms) + die("migration-event: Internal tree error"); + add_sched_out_event(atoms, 'R', timestamp); + } + + BUG_ON(list_empty(&atoms->work_list)); + + atom = list_entry(atoms->work_list.prev, struct work_atom, list); + atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp; + + nr_timestamps++; + + if (atom->sched_out_time > timestamp) + nr_unordered_timestamps++; +} + static struct trace_sched_handler lat_ops = { .wakeup_event = latency_wakeup_event, .switch_event = latency_switch_event, .runtime_event = latency_runtime_event, .fork_event = latency_fork_event, + .migrate_task_event = latency_migrate_task_event, }; static void output_lat_thread(struct work_atoms *work_list) @@ -1518,6 +1590,26 @@ process_sched_exit_event(struct event *event, } static void +process_sched_migrate_task_event(struct raw_event_sample *raw, + struct event *event, + int cpu __used, + u64 timestamp __used, + struct thread *thread __used) +{ + struct trace_migrate_task_event migrate_task_event; + + FILL_COMMON_FIELDS(migrate_task_event, event, raw->data); + + FILL_ARRAY(migrate_task_event, comm, event, raw->data); + FILL_FIELD(migrate_task_event, pid, event, raw->data); + FILL_FIELD(migrate_task_event, prio, event, raw->data); + FILL_FIELD(migrate_task_event, cpu, event, raw->data); + + if (trace_handler->migrate_task_event) + trace_handler->migrate_task_event(&migrate_task_event, event, cpu, timestamp, thread); +} + +static void process_raw_event(event_t *raw_event __used, void *more_data, int cpu, u64 timestamp, struct thread *thread) { @@ -1540,6 +1632,8 @@ process_raw_event(event_t *raw_event __used, void *more_data, process_sched_fork_event(raw, event, cpu, timestamp, thread); if (!strcmp(event->name, "sched_process_exit")) process_sched_exit_event(event, cpu, timestamp, thread); + if (!strcmp(event->name, "sched_migrate_task")) + process_sched_migrate_task_event(raw, event, cpu, timestamp, thread); } static int @@ -1589,6 +1683,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } + if (profile_cpu != -1 && profile_cpu != (int) cpu) + return 0; + process_raw_event(event, more_data, cpu, timestamp, thread); return 0; @@ -1771,6 +1868,8 @@ static const struct option latency_options[] = { "sort by key(s): runtime, switch, avg, max"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), + OPT_INTEGER('C', "CPU", &profile_cpu, + "CPU to profile on"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_END() -- cgit v0.10.2 From fb2531953fd8855abdcf458459020fd382c5deca Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 7 Oct 2009 13:20:38 +0200 Subject: mce, edac: Use an atomic notifier for MCEs decoding Add an atomic notifier which ensures proper locking when conveying MCE info to EDAC for decoding. The actual notifier call overrides a default, negative priority notifier. Note: make sure we register the default decoder only once since mcheck_init() runs on each CPU. Signed-off-by: Borislav Petkov LKML-Reference: <20091003065752.GA8935@liondog.tnic> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f1363b7..227a72d 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -108,6 +108,8 @@ struct mce_log { #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) +extern struct atomic_notifier_head x86_mce_decoder_chain; + #ifdef __KERNEL__ #include @@ -213,6 +215,5 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); void intel_init_thermal(struct cpuinfo_x86 *c); void mce_log_therm_throt_event(__u64 status); - #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1598a9..15ba9c9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -85,18 +85,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; -static void default_decode_mce(struct mce *m) +/* + * CPU/chipset specific EDAC code can register a notifier call here to print + * MCE errors in a human-readable form. + */ +ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); + +static int default_decode_mce(struct notifier_block *nb, unsigned long val, + void *data) { pr_emerg("No human readable MCE decoding support on this CPU type.\n"); pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); + + return NOTIFY_STOP; } -/* - * CPU/chipset specific EDAC code can register a callback here to print - * MCE errors in a human-readable form: - */ -void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; -EXPORT_SYMBOL(x86_mce_decode_callback); +static struct notifier_block mce_dec_nb = { + .notifier_call = default_decode_mce, + .priority = -1, +}; /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -204,9 +212,9 @@ static void print_mce(struct mce *m) /* * Print out human-readable details about the MCE error, - * (if the CPU has an implementation for that): + * (if the CPU has an implementation for that) */ - x86_mce_decode_callback(m); + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); } static void print_mce_head(void) @@ -1420,6 +1428,9 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) mce_cpu_features(c); mce_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); + + if (raw_smp_processor_id() == 0) + atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); } /* diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 713ed7d..689cc6a 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c @@ -3,7 +3,6 @@ static bool report_gart_errors; static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); -static void (*orig_mce_callback)(struct mce *m); void amd_report_gart_errors(bool v) { @@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec) pr_warning("Huh? Unknown MCE error 0x%x\n", ec); } -static void amd_decode_mce(struct mce *m) +static int amd_decode_mce(struct notifier_block *nb, unsigned long val, + void *data) { + struct mce *m = (struct mce *)data; struct err_regs regs; int node, ecc; @@ -420,20 +421,22 @@ static void amd_decode_mce(struct mce *m) } amd_decode_err_code(m->status & 0xffff); + + return NOTIFY_STOP; } +static struct notifier_block amd_mce_dec_nb = { + .notifier_call = amd_decode_mce, +}; + static int __init mce_amd_init(void) { /* * We can decode MCEs for Opteron and later CPUs: */ if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && - (boot_cpu_data.x86 >= 0xf)) { - /* safe the default decode mce callback */ - orig_mce_callback = x86_mce_decode_callback; - - x86_mce_decode_callback = amd_decode_mce; - } + (boot_cpu_data.x86 >= 0xf)) + atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); return 0; } @@ -442,7 +445,7 @@ early_initcall(mce_amd_init); #ifdef MODULE static void __exit mce_amd_exit(void) { - x86_mce_decode_callback = orig_mce_callback; + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); } MODULE_DESCRIPTION("AMD MCE decoder"); -- cgit v0.10.2 From ae24ffe5ecec17c956ac25371d7c2e12b4b36e53 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 12 Oct 2009 10:18:23 -0400 Subject: x86, 64-bit: Move K8 B step iret fixup to fault entry asm Move the handling of truncated %rip from an iret fault to the fault entry path. This allows x86-64 to use the standard search_extable() function. Signed-off-by: Brian Gerst Cc: Linus Torvalds Cc: Jan Beulich LKML-Reference: <1255357103-5418-1-git-send-email-brgerst@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d2c6c93..abd3e0e 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -570,7 +570,6 @@ extern struct movsl_mask { #ifdef CONFIG_X86_32 # include "uaccess_32.h" #else -# define ARCH_HAS_SEARCH_EXTABLE # include "uaccess_64.h" #endif diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b5c061f..af0f4b22 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1491,12 +1491,17 @@ error_kernelspace: leaq irq_return(%rip),%rcx cmpq %rcx,RIP+8(%rsp) je error_swapgs - movl %ecx,%ecx /* zero extend */ - cmpq %rcx,RIP+8(%rsp) - je error_swapgs + movl %ecx,%eax /* zero extend */ + cmpq %rax,RIP+8(%rsp) + je bstep_iret cmpq $gs_change,RIP+8(%rsp) je error_swapgs jmp error_sti + +bstep_iret: + /* Fix truncated RIP */ + movq %rcx,RIP+8(%rsp) + je error_swapgs END(error_entry) diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 61b41ca..d0474ad 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -35,34 +35,3 @@ int fixup_exception(struct pt_regs *regs) return 0; } - -#ifdef CONFIG_X86_64 -/* - * Need to defined our own search_extable on X86_64 to work around - * a B stepping K8 bug. - */ -const struct exception_table_entry * -search_extable(const struct exception_table_entry *first, - const struct exception_table_entry *last, - unsigned long value) -{ - /* B stepping K8 bug */ - if ((value >> 32) == 0) - value |= 0xffffffffUL << 32; - - while (first <= last) { - const struct exception_table_entry *mid; - long diff; - - mid = (last - first) / 2 + first; - diff = mid->insn - value; - if (diff == 0) - return mid; - else if (diff < 0) - first = mid+1; - else - last = mid-1; - } - return NULL; -} -#endif -- cgit v0.10.2 From 405b2651e4bedf8d3932b64cad649b4d26b067f5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 7 Oct 2009 18:27:40 -0400 Subject: tracing/kprobes: Add $ prefix to special variables Add $ prefix to the special variables(e.g. sa, rv) of kprobe-tracer. This resolves consistency issues between kprobe_events and perf-kprobe. The main goal is to avoid conflicts between local variable names of probed functions, used by perf probe, and special variables used in the kprobe event creation interface (stack values, etc...) and also available from perf probe. ie: we don't want rv (return value) to conflict with a local variable named rv in a probed function. Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Christoph Hellwig Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Frank Ch. Eigler LKML-Reference: <20091007222740.1684.91170.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 9b8f7c6..33f5318 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -36,13 +36,13 @@ Synopsis of kprobe_events FETCHARGS : Arguments. Each probe can have up to 128 args. %REG : Fetch register REG - sN : Fetch Nth entry of stack (N >= 0) - sa : Fetch stack address. @ADDR : Fetch memory at ADDR (ADDR should be in kernel) @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) - aN : Fetch function argument. (N >= 0)(*) - rv : Fetch return value.(**) - ra : Fetch return address.(**) + $sN : Fetch Nth entry of stack (N >= 0) + $sa : Fetch stack address. + $aN : Fetch function argument. (N >= 0)(*) + $rv : Fetch return value.(**) + $ra : Fetch return address.(**) +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) NAME=FETCHARG: Set NAME as the argument name of FETCHARG. @@ -85,13 +85,13 @@ Usage examples To add a probe as a new event, write a new definition to kprobe_events as below. - echo p:myprobe do_sys_open dfd=a0 filename=a1 flags=a2 mode=a3 > /sys/kernel/debug/tracing/kprobe_events + echo p:myprobe do_sys_open dfd=$a0 filename=$a1 flags=$a2 mode=$a3 > /sys/kernel/debug/tracing/kprobe_events This sets a kprobe on the top of do_sys_open() function with recording 1st to 4th arguments as "myprobe" event. As this example shows, users can choose more familiar names for each arguments. - echo r:myretprobe do_sys_open rv ra >> /sys/kernel/debug/tracing/kprobe_events + echo r:myretprobe do_sys_open $rv $ra >> /sys/kernel/debug/tracing/kprobe_events This sets a kretprobe on the return point of do_sys_open() function with recording return value and return address as "myretprobe" event. @@ -138,11 +138,11 @@ events, you need to enable it. # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0 - <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) rv=fffffffffffffffe ra=ffffffff81367a3a + <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $rv=fffffffffffffffe $ra=ffffffff81367a3a <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6 - <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) rv=3 ra=ffffffff81367a3a + <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $rv=3 $ra=ffffffff81367a3a <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10 - <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) rv=3 ra=ffffffff81367a3a + <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $rv=3 $ra=ffffffff81367a3a Each line shows when the kernel hits an event, and <- SYMBOL means kernel diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 97309d4..f63ead0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -220,24 +220,24 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) int ret = -EINVAL; if (ff->func == fetch_argument) - ret = snprintf(buf, n, "a%lu", (unsigned long)ff->data); + ret = snprintf(buf, n, "$a%lu", (unsigned long)ff->data); else if (ff->func == fetch_register) { const char *name; name = regs_query_register_name((unsigned int)((long)ff->data)); ret = snprintf(buf, n, "%%%s", name); } else if (ff->func == fetch_stack) - ret = snprintf(buf, n, "s%lu", (unsigned long)ff->data); + ret = snprintf(buf, n, "$s%lu", (unsigned long)ff->data); else if (ff->func == fetch_memory) ret = snprintf(buf, n, "@0x%p", ff->data); else if (ff->func == fetch_symbol) { struct symbol_cache *sc = ff->data; ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset); } else if (ff->func == fetch_retvalue) - ret = snprintf(buf, n, "rv"); + ret = snprintf(buf, n, "$rv"); else if (ff->func == fetch_ip) - ret = snprintf(buf, n, "ra"); + ret = snprintf(buf, n, "$ra"); else if (ff->func == fetch_stack_address) - ret = snprintf(buf, n, "sa"); + ret = snprintf(buf, n, "$sa"); else if (ff->func == fetch_indirect) { struct indirect_fetch_data *id = ff->data; size_t l = 0; @@ -429,12 +429,10 @@ static int split_symbol_offset(char *symbol, unsigned long *offset) #define PARAM_MAX_ARGS 16 #define PARAM_MAX_STACK (THREAD_SIZE / sizeof(unsigned long)) -static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) +static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) { int ret = 0; unsigned long param; - long offset; - char *tmp; switch (arg[0]) { case 'a': /* argument */ @@ -456,14 +454,6 @@ static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) } else ret = -EINVAL; break; - case '%': /* named register */ - ret = regs_query_register_offset(arg + 1); - if (ret >= 0) { - ff->func = fetch_register; - ff->data = (void *)(unsigned long)ret; - ret = 0; - } - break; case 's': /* stack */ if (arg[1] == 'a') { ff->func = fetch_stack_address; @@ -478,6 +468,31 @@ static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) } } break; + default: + ret = -EINVAL; + } + return ret; +} + +static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) +{ + int ret = 0; + unsigned long param; + long offset; + char *tmp; + + switch (arg[0]) { + case '$': + ret = parse_probe_vars(arg + 1, ff, is_return); + break; + case '%': /* named register */ + ret = regs_query_register_offset(arg + 1); + if (ret >= 0) { + ff->func = fetch_register; + ff->data = (void *)(unsigned long)ret; + ret = 0; + } + break; case '@': /* memory or symbol */ if (isdigit(arg[1])) { ret = strict_strtoul(arg + 1, 0, ¶m); @@ -489,8 +504,7 @@ static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) ret = split_symbol_offset(arg + 1, &offset); if (ret) break; - ff->data = alloc_symbol_cache(arg + 1, - offset); + ff->data = alloc_symbol_cache(arg + 1, offset); if (ff->data) ff->func = fetch_symbol; else @@ -544,11 +558,11 @@ static int create_trace_probe(int argc, char **argv) * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] * Fetch args: - * aN : fetch Nth of function argument. (N:0-) - * rv : fetch return value - * ra : fetch return address - * sa : fetch stack address - * sN : fetch Nth of stack (N:0-) + * $aN : fetch Nth of function argument. (N:0-) + * $rv : fetch return value + * $ra : fetch return address + * $sa : fetch stack address + * $sN : fetch Nth of stack (N:0-) * @ADDR : fetch memory at ADDR (ADDR should be in kernel) * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) * %REG : fetch register REG -- cgit v0.10.2 From 99329c44f28a1b7ac83beebfb4319e612042e319 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 7 Oct 2009 18:27:48 -0400 Subject: tracing/kprobes: Remove '$ra' special variable Remove '$ra' (return address) because it is already shown at the head of each entry. Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Christoph Hellwig Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Frank Ch. Eigler LKML-Reference: <20091007222748.1684.12711.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 33f5318..4208253 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -1,4 +1,4 @@ - Kprobe-based Event Tracer + Kprobe-based Event Tracer ========================= Documentation is written by Masami Hiramatsu @@ -42,7 +42,6 @@ Synopsis of kprobe_events $sa : Fetch stack address. $aN : Fetch function argument. (N >= 0)(*) $rv : Fetch return value.(**) - $ra : Fetch return address.(**) +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) NAME=FETCHARG: Set NAME as the argument name of FETCHARG. @@ -91,10 +90,10 @@ as below. 1st to 4th arguments as "myprobe" event. As this example shows, users can choose more familiar names for each arguments. - echo r:myretprobe do_sys_open $rv $ra >> /sys/kernel/debug/tracing/kprobe_events + echo r:myretprobe do_sys_open $rv >> /sys/kernel/debug/tracing/kprobe_events This sets a kretprobe on the return point of do_sys_open() function with -recording return value and return address as "myretprobe" event. +recording return value as "myretprobe" event. You can see the format of these events via /sys/kernel/debug/tracing/events/kprobes//format. @@ -138,11 +137,11 @@ events, you need to enable it. # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0 - <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $rv=fffffffffffffffe $ra=ffffffff81367a3a + <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $rv=fffffffffffffffe <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6 - <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $rv=3 $ra=ffffffff81367a3a + <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $rv=3 <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10 - <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $rv=3 $ra=ffffffff81367a3a + <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $rv=3 Each line shows when the kernel hits an event, and <- SYMBOL means kernel diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index f63ead0..ba6d3bd4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -85,11 +85,6 @@ static __kprobes unsigned long fetch_retvalue(struct pt_regs *regs, return regs_return_value(regs); } -static __kprobes unsigned long fetch_ip(struct pt_regs *regs, void *dummy) -{ - return instruction_pointer(regs); -} - static __kprobes unsigned long fetch_stack_address(struct pt_regs *regs, void *dummy) { @@ -234,8 +229,6 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset); } else if (ff->func == fetch_retvalue) ret = snprintf(buf, n, "$rv"); - else if (ff->func == fetch_ip) - ret = snprintf(buf, n, "$ra"); else if (ff->func == fetch_stack_address) ret = snprintf(buf, n, "$sa"); else if (ff->func == fetch_indirect) { @@ -448,9 +441,6 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) if (is_return && arg[1] == 'v') { ff->func = fetch_retvalue; ff->data = NULL; - } else if (is_return && arg[1] == 'a') { - ff->func = fetch_ip; - ff->data = NULL; } else ret = -EINVAL; break; @@ -560,7 +550,6 @@ static int create_trace_probe(int argc, char **argv) * Fetch args: * $aN : fetch Nth of function argument. (N:0-) * $rv : fetch return value - * $ra : fetch return address * $sa : fetch stack address * $sN : fetch Nth of stack (N:0-) * @ADDR : fetch memory at ADDR (ADDR should be in kernel) -- cgit v0.10.2 From 369bc18f9a6c4e2686204c1d7476ab684a720968 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Mon, 12 Oct 2009 22:17:21 +0200 Subject: ftrace: add kernel command line graph function filtering Add a command line parameter to allow limiting the function graphs that are traced on boot up from the given top-level callers , when ftrace=function_graph is specified. This patch adds the following command line option: ftrace_graph_filter=function-list Where function-list is a comma separated list of functions to filter. [fweisbec@gmail.com: picked the documentation changes from the v2 patch] Signed-off-by: Stefan Assmann Acked-by: Steven Rostedt LKML-Reference: <4AD2DEB9.2@redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6fa7292..1dc4b9c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -778,6 +778,13 @@ and is between 256 and 4096 characters. It is defined in the file by the set_ftrace_notrace file in the debugfs tracing directory. + ftrace_graph_filter=[function-list] + [FTRACE] Limit the top level callers functions traced + by the function graph tracer at boot up. + function-list is a comma separated list of functions + that can be changed at run time by the + set_graph_function file in the debugfs tracing directory. + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9a72853..91283d4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -78,6 +78,10 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); +#endif + static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { struct ftrace_ops *op = ftrace_list; @@ -2248,6 +2252,7 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset) #define FTRACE_FILTER_SIZE COMMAND_LINE_SIZE static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; +static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; static int __init set_ftrace_notrace(char *str) { @@ -2263,6 +2268,31 @@ static int __init set_ftrace_filter(char *str) } __setup("ftrace_filter=", set_ftrace_filter); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static int __init set_graph_function(char *str) +{ + strncpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); + return 1; +} +__setup("ftrace_graph_filter=", set_graph_function); + +static void __init set_ftrace_early_graph(char *buf) +{ + int ret; + char *func; + + while (buf) { + func = strsep(&buf, ","); + /* we allow only one expression at a time */ + ret = ftrace_set_func(ftrace_graph_funcs, &ftrace_graph_count, + func); + if (ret) + printk(KERN_DEBUG "ftrace: function %s not " + "traceable\n", func); + } +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + static void __init set_ftrace_early_filter(char *buf, int enable) { char *func; @@ -2279,6 +2309,10 @@ static void __init set_ftrace_early_filters(void) set_ftrace_early_filter(ftrace_filter_buf, 1); if (ftrace_notrace_buf[0]) set_ftrace_early_filter(ftrace_notrace_buf, 0); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + if (ftrace_graph_buf[0]) + set_ftrace_early_graph(ftrace_graph_buf); +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ } static int -- cgit v0.10.2 From ad8f4356af58f7ded6b4a5787c67c7cab51066b5 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 6 Oct 2009 07:04:52 -0700 Subject: x86: Don't use the strict copy checks when branch profiling is in use The branch profiling creates very complex code for each if statement, to the point that gcc has trouble even analyzing something as simple as if (count > 5) count = 5; This then means that causing an error on code that gcc cannot analyze for copy_from_user() and co is not very productive. This patch excludes the strict copy checks in the case of branch profiling being enabled. Signed-off-by: Arjan van de Ven Cc: Steven Rostedt LKML-Reference: <20091006070452.5e1fc119@infradead.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 1bd2e36f..fb772b6 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -289,7 +289,7 @@ config OPTIMIZE_INLINING config DEBUG_STRICT_USER_COPY_CHECKS bool "Strict copy size checks" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING ---help--- Enabling this option turns a certain set of sanity checks for user copy operations into compile time failures. -- cgit v0.10.2 From def3c5d0a34e4b09b3cea4435c17209ad347104d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Oct 2009 14:09:07 -0700 Subject: x86: use kernel_stack_pointer() in process_32.c The way to obtain a kernel-mode stack pointer from a struct pt_regs in 32-bit mode is "subtle": the stack doesn't actually contain the stack pointer, but rather the location where it would have been marks the actual previous stack frame. For clarity, use kernel_stack_pointer() instead of coding this weirdness explicitly. Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf7956..35e6fad 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -134,7 +134,7 @@ void __show_regs(struct pt_regs *regs, int all) ss = regs->ss & 0xffff; gs = get_user_gs(regs); } else { - sp = (unsigned long) (®s->sp); + sp = kernel_stack_pointer(regs); savesegment(ss, ss); savesegment(gs, gs); } -- cgit v0.10.2 From a343c75d338aa2afaea4a2a8e40de9e67b6fb4a7 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Oct 2009 14:11:09 -0700 Subject: x86: use kernel_stack_pointer() in dumpstack.c The way to obtain a kernel-mode stack pointer from a struct pt_regs in 32-bit mode is "subtle": the stack doesn't actually contain the stack pointer, but rather the location where it would have been marks the actual previous stack frame. For clarity, use kernel_stack_pointer() instead of coding this weirdness explicitly. Furthermore, user_mode() is only valid when the process is known to not run in V86 mode. Use the safer user_mode_vm() instead. Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2d8a371..b8ce165 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -268,11 +268,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) show_registers(regs); #ifdef CONFIG_X86_32 - sp = (unsigned long) (®s->sp); - savesegment(ss, ss); - if (user_mode(regs)) { + if (user_mode_vm(regs)) { sp = regs->sp; ss = regs->ss & 0xffff; + } else { + sp = kernel_stack_pointer(regs); + savesegment(ss, ss); } printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); print_symbol("%s", regs->ip); -- cgit v0.10.2 From 5ca6c0ca5dbf105d7b0ffdae2289519982189730 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Oct 2009 14:12:18 -0700 Subject: x86: use kernel_stack_pointer() in kgdb.c The way to obtain a kernel-mode stack pointer from a struct pt_regs in 32-bit mode is "subtle": the stack doesn't actually contain the stack pointer, but rather the location where it would have been marks the actual previous stack frame. For clarity, use kernel_stack_pointer() instead of coding this weirdness explicitly. Signed-off-by: H. Peter Anvin Cc: Jason Wessel diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77..3310d84 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -88,7 +88,6 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; - gdb_regs[GDB_SP] = (int)®s->sp; #else gdb_regs[GDB_R8] = regs->r8; gdb_regs[GDB_R9] = regs->r9; @@ -101,8 +100,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs32[GDB_PS] = regs->flags; gdb_regs32[GDB_CS] = regs->cs; gdb_regs32[GDB_SS] = regs->ss; - gdb_regs[GDB_SP] = regs->sp; #endif + gdb_regs[GDB_SP] = kernel_stack_pointer(regs); } /** -- cgit v0.10.2 From 98272ed0d2e6509fe7dc571e77956c99bf653bb6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 12 Oct 2009 14:14:10 -0700 Subject: x86: use kernel_stack_pointer() in kprobes.c The way to obtain a kernel-mode stack pointer from a struct pt_regs in 32-bit mode is "subtle": the stack doesn't actually contain the stack pointer, but rather the location where it would have been marks the actual previous stack frame. For clarity, use kernel_stack_pointer() instead of coding this weirdness explicitly. Signed-off-by: H. Peter Anvin Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: "David S. Miller" Cc: Masami Hiramatsu diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d..2ee4fa3 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -60,19 +60,7 @@ void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#ifdef CONFIG_X86_64 -#define stack_addr(regs) ((unsigned long *)regs->sp) -#else -/* - * "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs - * don't save the ss and esp registers if the CPU is already in kernel - * mode when it traps. So for kprobes, regs->sp and regs->ss are not - * the [nonexistent] saved stack pointer and ss register, but rather - * the top 8 bytes of the pre-int3 stack. So ®s->sp happens to - * point to the top of the pre-int3 stack. - */ -#define stack_addr(regs) ((unsigned long *)®s->sp) -#endif +#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ -- cgit v0.10.2 From 2e06ff6389aedafc4a3a374344ac70672252f9b5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 7 Oct 2009 18:27:59 -0400 Subject: tracing/kprobes: Make special variable names more self-explainable Rename special variables to more self-explainable names as below: - $rv to $retval - $sa to $stack - $aN to $argN - $sN to $stackN Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Christoph Hellwig Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Frank Ch. Eigler LKML-Reference: <20091007222759.1684.3319.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 4208253..1541524 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -35,13 +35,13 @@ Synopsis of kprobe_events MEMADDR : Address where the probe is inserted. FETCHARGS : Arguments. Each probe can have up to 128 args. - %REG : Fetch register REG - @ADDR : Fetch memory at ADDR (ADDR should be in kernel) + %REG : Fetch register REG + @ADDR : Fetch memory at ADDR (ADDR should be in kernel) @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol) - $sN : Fetch Nth entry of stack (N >= 0) - $sa : Fetch stack address. - $aN : Fetch function argument. (N >= 0)(*) - $rv : Fetch return value.(**) + $stackN : Fetch Nth entry of stack (N >= 0) + $stack : Fetch stack address. + $argN : Fetch function argument. (N >= 0)(*) + $retval : Fetch return value.(**) +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(***) NAME=FETCHARG: Set NAME as the argument name of FETCHARG. @@ -84,13 +84,13 @@ Usage examples To add a probe as a new event, write a new definition to kprobe_events as below. - echo p:myprobe do_sys_open dfd=$a0 filename=$a1 flags=$a2 mode=$a3 > /sys/kernel/debug/tracing/kprobe_events + echo p:myprobe do_sys_open dfd=$arg0 filename=$arg1 flags=$arg2 mode=$arg3 > /sys/kernel/debug/tracing/kprobe_events This sets a kprobe on the top of do_sys_open() function with recording 1st to 4th arguments as "myprobe" event. As this example shows, users can choose more familiar names for each arguments. - echo r:myretprobe do_sys_open $rv >> /sys/kernel/debug/tracing/kprobe_events + echo r:myretprobe do_sys_open $retval >> /sys/kernel/debug/tracing/kprobe_events This sets a kretprobe on the return point of do_sys_open() function with recording return value as "myretprobe" event. @@ -137,11 +137,11 @@ events, you need to enable it. # TASK-PID CPU# TIMESTAMP FUNCTION # | | | | | <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0 - <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $rv=fffffffffffffffe + <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $retval=fffffffffffffffe <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6 - <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $rv=3 + <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10 - <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $rv=3 + <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3 Each line shows when the kernel hits an event, and <- SYMBOL means kernel diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ba6d3bd4..3313fa7 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -215,22 +215,22 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) int ret = -EINVAL; if (ff->func == fetch_argument) - ret = snprintf(buf, n, "$a%lu", (unsigned long)ff->data); + ret = snprintf(buf, n, "$arg%lu", (unsigned long)ff->data); else if (ff->func == fetch_register) { const char *name; name = regs_query_register_name((unsigned int)((long)ff->data)); ret = snprintf(buf, n, "%%%s", name); } else if (ff->func == fetch_stack) - ret = snprintf(buf, n, "$s%lu", (unsigned long)ff->data); + ret = snprintf(buf, n, "$stack%lu", (unsigned long)ff->data); else if (ff->func == fetch_memory) ret = snprintf(buf, n, "@0x%p", ff->data); else if (ff->func == fetch_symbol) { struct symbol_cache *sc = ff->data; ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset); } else if (ff->func == fetch_retvalue) - ret = snprintf(buf, n, "$rv"); + ret = snprintf(buf, n, "$retval"); else if (ff->func == fetch_stack_address) - ret = snprintf(buf, n, "$sa"); + ret = snprintf(buf, n, "$stack"); else if (ff->func == fetch_indirect) { struct indirect_fetch_data *id = ff->data; size_t l = 0; @@ -427,40 +427,36 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) int ret = 0; unsigned long param; - switch (arg[0]) { - case 'a': /* argument */ - ret = strict_strtoul(arg + 1, 10, ¶m); - if (ret || param > PARAM_MAX_ARGS) - ret = -EINVAL; - else { - ff->func = fetch_argument; - ff->data = (void *)param; - } - break; - case 'r': /* retval or retaddr */ - if (is_return && arg[1] == 'v') { + if (strcmp(arg, "retval") == 0) { + if (is_return) { ff->func = fetch_retvalue; ff->data = NULL; } else ret = -EINVAL; - break; - case 's': /* stack */ - if (arg[1] == 'a') { + } else if (strncmp(arg, "stack", 5) == 0) { + if (arg[5] == '\0') { ff->func = fetch_stack_address; ff->data = NULL; - } else { - ret = strict_strtoul(arg + 1, 10, ¶m); + } else if (isdigit(arg[5])) { + ret = strict_strtoul(arg + 5, 10, ¶m); if (ret || param > PARAM_MAX_STACK) ret = -EINVAL; else { ff->func = fetch_stack; ff->data = (void *)param; } + } else + ret = -EINVAL; + } else if (strncmp(arg, "arg", 3) == 0 && isdigit(arg[3])) { + ret = strict_strtoul(arg + 3, 10, ¶m); + if (ret || param > PARAM_MAX_ARGS) + ret = -EINVAL; + else { + ff->func = fetch_argument; + ff->data = (void *)param; } - break; - default: + } else ret = -EINVAL; - } return ret; } @@ -548,10 +544,10 @@ static int create_trace_probe(int argc, char **argv) * - Add kprobe: p[:[GRP/]EVENT] KSYM[+OFFS]|KADDR [FETCHARGS] * - Add kretprobe: r[:[GRP/]EVENT] KSYM[+0] [FETCHARGS] * Fetch args: - * $aN : fetch Nth of function argument. (N:0-) - * $rv : fetch return value - * $sa : fetch stack address - * $sN : fetch Nth of stack (N:0-) + * $argN : fetch Nth of function argument. (N:0-) + * $retval : fetch return value + * $stack : fetch stack address + * $stackN : fetch Nth of stack (N:0-) * @ADDR : fetch memory at ADDR (ADDR should be in kernel) * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol) * %REG : fetch register REG -- cgit v0.10.2 From a703d946e883d8e447d0597de556e2effd110372 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 7 Oct 2009 18:28:07 -0400 Subject: tracing/kprobes: Avoid field name confliction Check whether the argument name is in conflict with other field names while creating a kprobe through the debugfs interface. Changes in v3: - Check strcmp() == 0 instead of !strcmp(). Changes in v2: - Add common_lock_depth to reserved name list. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Christoph Hellwig Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Frank Ch. Eigler LKML-Reference: <20091007222807.1684.26880.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3313fa7..bb6cb2b 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -38,6 +38,25 @@ #define MAX_EVENT_NAME_LEN 64 #define KPROBE_EVENT_SYSTEM "kprobes" +/* Reserved field names */ +#define FIELD_STRING_IP "ip" +#define FIELD_STRING_NARGS "nargs" +#define FIELD_STRING_RETIP "ret_ip" +#define FIELD_STRING_FUNC "func" + +const char *reserved_field_names[] = { + "common_type", + "common_flags", + "common_preempt_count", + "common_pid", + "common_tgid", + "common_lock_depth", + FIELD_STRING_IP, + FIELD_STRING_NARGS, + FIELD_STRING_RETIP, + FIELD_STRING_FUNC, +}; + /* currently, trace_kprobe only supports X86. */ struct fetch_func { @@ -537,6 +556,20 @@ static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) return ret; } +/* Return 1 if name is reserved or already used by another argument */ +static int conflict_field_name(const char *name, + struct probe_arg *args, int narg) +{ + int i; + for (i = 0; i < ARRAY_SIZE(reserved_field_names); i++) + if (strcmp(reserved_field_names[i], name) == 0) + return 1; + for (i = 0; i < narg; i++) + if (strcmp(args[i].name, name) == 0) + return 1; + return 0; +} + static int create_trace_probe(int argc, char **argv) { /* @@ -637,6 +670,12 @@ static int create_trace_probe(int argc, char **argv) *arg++ = '\0'; else arg = argv[i]; + + if (conflict_field_name(argv[i], tp->args, i)) { + ret = -EINVAL; + goto error; + } + tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); /* Parse fetch argument */ @@ -1039,8 +1078,8 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) if (!ret) return ret; - DEFINE_FIELD(unsigned long, ip, "ip", 0); - DEFINE_FIELD(int, nargs, "nargs", 1); + DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); + DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); /* Set argument names as fields */ for (i = 0; i < tp->nr_args; i++) DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); @@ -1057,9 +1096,9 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) if (!ret) return ret; - DEFINE_FIELD(unsigned long, func, "func", 0); - DEFINE_FIELD(unsigned long, ret_ip, "ret_ip", 0); - DEFINE_FIELD(int, nargs, "nargs", 1); + DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); + DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0); + DEFINE_FIELD(int, nargs, FIELD_STRING_NARGS, 1); /* Set argument names as fields */ for (i = 0; i < tp->nr_args; i++) DEFINE_FIELD(unsigned long, args[i], tp->args[i].name, 0); @@ -1108,15 +1147,16 @@ static int kprobe_event_show_format(struct ftrace_event_call *call, int ret, i; struct trace_probe *tp = (struct trace_probe *)call->data; - SHOW_FIELD(unsigned long, ip, "ip"); - SHOW_FIELD(int, nargs, "nargs"); + SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); + SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); /* Show fields */ for (i = 0; i < tp->nr_args; i++) SHOW_FIELD(unsigned long, args[i], tp->args[i].name); trace_seq_puts(s, "\n"); - return __probe_event_show_format(s, tp, "(%lx)", "REC->ip"); + return __probe_event_show_format(s, tp, "(%lx)", + "REC->" FIELD_STRING_IP); } static int kretprobe_event_show_format(struct ftrace_event_call *call, @@ -1126,9 +1166,9 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, int ret, i; struct trace_probe *tp = (struct trace_probe *)call->data; - SHOW_FIELD(unsigned long, func, "func"); - SHOW_FIELD(unsigned long, ret_ip, "ret_ip"); - SHOW_FIELD(int, nargs, "nargs"); + SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); + SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); + SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); /* Show fields */ for (i = 0; i < tp->nr_args; i++) @@ -1136,7 +1176,8 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, trace_seq_puts(s, "\n"); return __probe_event_show_format(s, tp, "(%lx <- %lx)", - "REC->func, REC->ret_ip"); + "REC->" FIELD_STRING_FUNC + ", REC->" FIELD_STRING_RETIP); } #ifdef CONFIG_EVENT_PROFILE -- cgit v0.10.2 From e93f4d8539d5e9dd59f4af9d8ef4e9b62cfa1f81 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 7 Oct 2009 18:28:14 -0400 Subject: tracing/kprobes: Robustify fixed field names against variable field names conflicts Rename probe-common fixed field names to harder conflictable names, because current 'ip', 'func', and other probe field names are easily in conflict with user-specified variable names. Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Christoph Hellwig Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Frank Ch. Eigler LKML-Reference: <20091007222814.1684.407.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index bb6cb2b..739f70e 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -39,10 +39,10 @@ #define KPROBE_EVENT_SYSTEM "kprobes" /* Reserved field names */ -#define FIELD_STRING_IP "ip" -#define FIELD_STRING_NARGS "nargs" -#define FIELD_STRING_RETIP "ret_ip" -#define FIELD_STRING_FUNC "func" +#define FIELD_STRING_IP "__probe_ip" +#define FIELD_STRING_NARGS "__probe_nargs" +#define FIELD_STRING_RETIP "__probe_ret_ip" +#define FIELD_STRING_FUNC "__probe_func" const char *reserved_field_names[] = { "common_type", -- cgit v0.10.2 From 4ea42b181434bfc6a0a18d32214130a242d489bf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 8 Oct 2009 17:17:38 -0400 Subject: perf: Add perf probe subcommand, a kprobe-event setup helper Add perf probe subcommand that implements a kprobe-event setup helper to the perf command. This allows user to define kprobe events using C expressions (C line numbers, C function names, and C local variables). Usage ----- perf probe [] -P 'PROBEDEF' [-P 'PROBEDEF' ...] -k, --vmlinux vmlinux/module pathname -P, --probe probe point definition, where p: kprobe probe r: kretprobe probe GRP: Group name (optional) NAME: Event name FUNC: Function name OFFS: Offset from function entry (in byte) SRC: Source code path LINE: Line number ARG: Probe argument (local variable name or kprobe-tracer argument format is supported.) Changes in v4: - Add _GNU_SOURCE macro for strndup(). Changes in v3: - Remove -r option because perf always be used for online kernel. - Check malloc/calloc results. Changes in v2: - Check synthesized string length. - Rename perf kprobe to perf probe. - Use spaces for separator and update usage comment. - Check error paths in parse_probepoint(). - Check optimized-out variables. Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Christoph Hellwig Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Frank Ch. Eigler LKML-Reference: <20091008211737.29299.14784.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/tools/perf/Makefile b/tools/perf/Makefile index b5f1953..6dabcf1 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -419,6 +419,16 @@ ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel); endif +ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) + msg := $(warning No libdwarf.h found, disables probe subcommand. Please install libdwarf-dev/libdwarf-devel); +else + EXTLIBS += -lelf -ldwarf + LIB_H += util/probe-finder.h + LIB_OBJS += util/probe-finder.o + BUILTIN_OBJS += builtin-probe.o + BASIC_CFLAGS += -DSUPPORT_DWARF +endif + ifdef NO_DEMANGLE BASIC_CFLAGS += -DNO_DEMANGLE else diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c new file mode 100644 index 0000000..24b64b5 --- /dev/null +++ b/tools/perf/builtin-probe.c @@ -0,0 +1,358 @@ +/* + * builtin-probe.c + * + * Builtin probe command: Set up probe events by C expression + * + * Written by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef _GNU_SOURCE +#include "perf.h" +#include "builtin.h" +#include "util/util.h" +#include "util/parse-options.h" +#include "util/parse-events.h" /* For debugfs_path */ +#include "util/probe-finder.h" + +/* Default vmlinux search paths */ +#define NR_SEARCH_PATH 3 +const char *default_search_path[NR_SEARCH_PATH] = { +"/lib/modules/%s/build/vmlinux", /* Custom build kernel */ +"/usr/lib/debug/lib/modules/%s/vmlinux", /* Red Hat debuginfo */ +"/boot/vmlinux-debug-%s", /* Ubuntu */ +}; + +#define MAX_PATH_LEN 256 +#define MAX_PROBES 128 + +/* Session management structure */ +static struct { + char *vmlinux; + char *release; + int nr_probe; + struct probe_point probes[MAX_PROBES]; + char *events[MAX_PROBES]; +} session; + +static void semantic_error(const char *msg) +{ + fprintf(stderr, "Semantic error: %s\n", msg); + exit(1); +} + +static void perror_exit(const char *msg) +{ + perror(msg); + exit(1); +} + +#define MAX_PROBE_ARGS 128 + +static int parse_probepoint(const struct option *opt __used, + const char *str, int unset __used) +{ + char *argv[MAX_PROBE_ARGS + 2]; /* Event + probe + args */ + int argc, i; + char *arg, *ptr; + struct probe_point *pp = &session.probes[session.nr_probe]; + char **event = &session.events[session.nr_probe]; + int retp = 0; + + if (!str) /* The end of probe points */ + return 0; + + debug("Probe-define(%d): %s\n", session.nr_probe, str); + if (++session.nr_probe == MAX_PROBES) + semantic_error("Too many probes"); + + /* Separate arguments, similar to argv_split */ + argc = 0; + do { + /* Skip separators */ + while (isspace(*str)) + str++; + + /* Add an argument */ + if (*str != '\0') { + const char *s = str; + + /* Skip the argument */ + while (!isspace(*str) && *str != '\0') + str++; + + /* Duplicate the argument */ + argv[argc] = strndup(s, str - s); + if (argv[argc] == NULL) + perror_exit("strndup"); + if (++argc == MAX_PROBE_ARGS) + semantic_error("Too many arguments"); + debug("argv[%d]=%s\n", argc, argv[argc - 1]); + } + } while (*str != '\0'); + if (argc < 2) + semantic_error("Need event-name and probe-point at least."); + + /* Parse the event name */ + if (argv[0][0] == 'r') + retp = 1; + else if (argv[0][0] != 'p') + semantic_error("You must specify 'p'(kprobe) or" + " 'r'(kretprobe) first."); + /* TODO: check event name */ + *event = argv[0]; + + /* Parse probe point */ + arg = argv[1]; + if (arg[0] == '@') { + /* Source Line */ + arg++; + ptr = strchr(arg, ':'); + if (!ptr || !isdigit(ptr[1])) + semantic_error("Line number is required."); + *ptr++ = '\0'; + if (strlen(arg) == 0) + semantic_error("No file name."); + pp->file = strdup(arg); + pp->line = atoi(ptr); + if (!pp->file || !pp->line) + semantic_error("Failed to parse line."); + debug("file:%s line:%d\n", pp->file, pp->line); + } else { + /* Function name */ + ptr = strchr(arg, '+'); + if (ptr) { + if (!isdigit(ptr[1])) + semantic_error("Offset is required."); + *ptr++ = '\0'; + pp->offset = atoi(ptr); + } else + ptr = arg; + ptr = strchr(ptr, '@'); + if (ptr) { + *ptr++ = '\0'; + pp->file = strdup(ptr); + } + pp->function = strdup(arg); + debug("symbol:%s file:%s offset:%d\n", + pp->function, pp->file, pp->offset); + } + free(argv[1]); + + /* Copy arguments */ + pp->nr_args = argc - 2; + if (pp->nr_args > 0) { + pp->args = (char **)malloc(sizeof(char *) * pp->nr_args); + if (!pp->args) + perror_exit("malloc"); + memcpy(pp->args, &argv[2], sizeof(char *) * pp->nr_args); + } + + /* Ensure return probe has no C argument */ + if (retp) + for (i = 0; i < pp->nr_args; i++) + if (is_c_varname(pp->args[i])) + semantic_error("You can't specify local" + " variable for kretprobe"); + debug("%d arguments\n", pp->nr_args); + return 0; +} + +static int open_default_vmlinux(void) +{ + struct utsname uts; + char fname[MAX_PATH_LEN]; + int fd, ret, i; + + ret = uname(&uts); + if (ret) { + debug("uname() failed.\n"); + return -errno; + } + session.release = uts.release; + for (i = 0; i < NR_SEARCH_PATH; i++) { + ret = snprintf(fname, MAX_PATH_LEN, + default_search_path[i], session.release); + if (ret >= MAX_PATH_LEN || ret < 0) { + debug("Filename(%d,%s) is too long.\n", i, uts.release); + errno = E2BIG; + return -E2BIG; + } + debug("try to open %s\n", fname); + fd = open(fname, O_RDONLY); + if (fd >= 0) + break; + } + return fd; +} + +static const char * const probe_usage[] = { + "perf probe [] -P 'PROBEDEF' [-P 'PROBEDEF' ...]", + NULL +}; + +static const struct option options[] = { + OPT_STRING('k', "vmlinux", &session.vmlinux, "file", + "vmlinux/module pathname"), + OPT_CALLBACK('P', "probe", NULL, + "p|r:[GRP/]NAME FUNC[+OFFS][@SRC]|@SRC:LINE [ARG ...]", + "probe point definition, where\n" + "\t\tp:\tkprobe probe\n" + "\t\tr:\tkretprobe probe\n" + "\t\tGRP:\tGroup name (optional)\n" + "\t\tNAME:\tEvent name\n" + "\t\tFUNC:\tFunction name\n" + "\t\tOFFS:\tOffset from function entry (in byte)\n" + "\t\tSRC:\tSource code path\n" + "\t\tLINE:\tLine number\n" + "\t\tARG:\tProbe argument (local variable name or\n" + "\t\t\tkprobe-tracer argument format is supported.)\n", + parse_probepoint), + OPT_END() +}; + +static int write_new_event(int fd, const char *buf) +{ + int ret; + + printf("Adding new event: %s\n", buf); + ret = write(fd, buf, strlen(buf)); + if (ret <= 0) + perror("Error: Failed to create event"); + + return ret; +} + +#define MAX_CMDLEN 256 + +static int synthesize_probepoint(struct probe_point *pp) +{ + char *buf; + int i, len, ret; + pp->probes[0] = buf = (char *)calloc(MAX_CMDLEN, sizeof(char)); + if (!buf) + perror_exit("calloc"); + ret = snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); + if (ret <= 0 || ret >= MAX_CMDLEN) + goto error; + len = ret; + + for (i = 0; i < pp->nr_args; i++) { + ret = snprintf(&buf[len], MAX_CMDLEN - len, " %s", + pp->args[i]); + if (ret <= 0 || ret >= MAX_CMDLEN - len) + goto error; + len += ret; + } + pp->found = 1; + return pp->found; +error: + free(pp->probes[0]); + if (ret > 0) + ret = -E2BIG; + return ret; +} + +int cmd_probe(int argc, const char **argv, const char *prefix __used) +{ + int i, j, fd, ret, need_dwarf = 0; + struct probe_point *pp; + char buf[MAX_CMDLEN]; + + argc = parse_options(argc, argv, options, probe_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (argc || session.nr_probe == 0) + usage_with_options(probe_usage, options); + + /* Synthesize return probes */ + for (j = 0; j < session.nr_probe; j++) { + if (session.events[j][0] != 'r') { + need_dwarf = 1; + continue; + } + ret = synthesize_probepoint(&session.probes[j]); + if (ret == -E2BIG) + semantic_error("probe point is too long."); + else if (ret < 0) { + perror("snprintf"); + return -1; + } + } + + if (!need_dwarf) + goto setup_probes; + + if (session.vmlinux) + fd = open(session.vmlinux, O_RDONLY); + else + fd = open_default_vmlinux(); + if (fd < 0) { + perror("vmlinux/module file open"); + return -1; + } + + /* Searching probe points */ + for (j = 0; j < session.nr_probe; j++) { + pp = &session.probes[j]; + if (pp->found) + continue; + + lseek(fd, SEEK_SET, 0); + ret = find_probepoint(fd, pp); + if (ret <= 0) { + fprintf(stderr, "Error: No probe point found.\n"); + return -1; + } + debug("probe event %s found\n", session.events[j]); + } + close(fd); + +setup_probes: + /* Settng up probe points */ + snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path); + fd = open(buf, O_WRONLY, O_APPEND); + if (fd < 0) { + perror("kprobe_events open"); + return -1; + } + for (j = 0; j < session.nr_probe; j++) { + pp = &session.probes[j]; + if (pp->found == 1) { + snprintf(buf, MAX_CMDLEN, "%s %s\n", + session.events[j], pp->probes[0]); + write_new_event(fd, buf); + } else + for (i = 0; i < pp->found; i++) { + snprintf(buf, MAX_CMDLEN, "%s%d %s\n", + session.events[j], i, pp->probes[i]); + write_new_event(fd, buf); + } + } + close(fd); + return 0; +} + diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index e11d8d2..ad5f0f4 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -25,5 +25,6 @@ extern int cmd_timechart(int argc, const char **argv, const char *prefix); extern int cmd_top(int argc, const char **argv, const char *prefix); extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); +extern int cmd_probe(int argc, const char **argv, const char *prefix); #endif diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 19fc7fe..f598120 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -295,6 +295,9 @@ static void handle_internal_command(int argc, const char **argv) { "version", cmd_version, 0 }, { "trace", cmd_trace, 0 }, { "sched", cmd_sched, 0 }, +#ifdef SUPPORT_DWARF + { "probe", cmd_probe, 0 }, +#endif }; unsigned int i; static const char ext[] = STRIP_EXTENSION; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c new file mode 100644 index 0000000..ec6f53f --- /dev/null +++ b/tools/perf/util/probe-finder.c @@ -0,0 +1,690 @@ +/* + * probe-finder.c : C expression to kprobe event converter + * + * Written by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "probe-finder.h" + + +/* Dwarf_Die Linkage to parent Die */ +struct die_link { + struct die_link *parent; /* Parent die */ + Dwarf_Die die; /* Current die */ +}; + +static Dwarf_Debug __dw_debug; +static Dwarf_Error __dw_error; + +static void msg_exit(int ret, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "Error: "); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "\n"); + exit(ret); +} + + +/* + * Generic dwarf analysis helpers + */ + +#define X86_32_MAX_REGS 8 +const char *x86_32_regs_table[X86_32_MAX_REGS] = { + "%ax", + "%cx", + "%dx", + "%bx", + "$stack", /* Stack address instead of %sp */ + "%bp", + "%si", + "%di", +}; + +#define X86_64_MAX_REGS 16 +const char *x86_64_regs_table[X86_64_MAX_REGS] = { + "%ax", + "%dx", + "%cx", + "%bx", + "%si", + "%di", + "%bp", + "%sp", + "%r8", + "%r9", + "%r10", + "%r11", + "%r12", + "%r13", + "%r14", + "%r15", +}; + +/* TODO: switching by dwarf address size */ +#ifdef __x86_64__ +#define ARCH_MAX_REGS X86_64_MAX_REGS +#define arch_regs_table x86_64_regs_table +#else +#define ARCH_MAX_REGS X86_32_MAX_REGS +#define arch_regs_table x86_32_regs_table +#endif + +/* Return architecture dependent register string (for kprobe-tracer) */ +static const char *get_arch_regstr(unsigned int n) +{ + return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; +} + +/* + * Compare the tail of two strings. + * Return 0 if whole of either string is same as another's tail part. + */ +static int strtailcmp(const char *s1, const char *s2) +{ + int i1 = strlen(s1); + int i2 = strlen(s2); + while (--i1 > 0 && --i2 > 0) { + if (s1[i1] != s2[i2]) + return s1[i1] - s2[i2]; + } + return 0; +} + +/* Find the fileno of the target file. */ +static Dwarf_Unsigned die_get_fileno(Dwarf_Die cu_die, const char *fname) +{ + Dwarf_Signed cnt, i; + Dwarf_Unsigned found = 0; + char **srcs; + int ret; + + if (!fname) + return 0; + + ret = dwarf_srcfiles(cu_die, &srcs, &cnt, &__dw_error); + if (ret == DW_DLV_OK) { + for (i = 0; i < cnt && !found; i++) { + if (strtailcmp(srcs[i], fname) == 0) + found = i + 1; + dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING); + } + for (; i < cnt; i++) + dwarf_dealloc(__dw_debug, srcs[i], DW_DLA_STRING); + dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST); + } + if (found) + debug("found fno: %d\n", (int)found); + return found; +} + +/* Compare diename and tname */ +static int die_compare_name(Dwarf_Die die, const char *tname) +{ + char *name; + int ret; + ret = dwarf_diename(die, &name, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + ret = strcmp(tname, name); + dwarf_dealloc(__dw_debug, name, DW_DLA_STRING); + } else + ret = -1; + return ret; +} + +/* Check the address is in the subprogram(function). */ +static int die_within_subprogram(Dwarf_Die sp_die, Dwarf_Addr addr, + Dwarf_Signed *offs) +{ + Dwarf_Addr lopc, hipc; + int ret; + + /* TODO: check ranges */ + ret = dwarf_lowpc(sp_die, &lopc, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_NO_ENTRY) + return 0; + ret = dwarf_highpc(sp_die, &hipc, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + if (lopc <= addr && addr < hipc) { + *offs = addr - lopc; + return 1; + } else + return 0; +} + +/* Check the die is inlined function */ +static Dwarf_Bool die_inlined_subprogram(Dwarf_Die die) +{ + /* TODO: check strictly */ + Dwarf_Bool inl; + int ret; + + ret = dwarf_hasattr(die, DW_AT_inline, &inl, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + return inl; +} + +/* Get the offset of abstruct_origin */ +static Dwarf_Off die_get_abstract_origin(Dwarf_Die die) +{ + Dwarf_Attribute attr; + Dwarf_Off cu_offs; + int ret; + + ret = dwarf_attr(die, DW_AT_abstract_origin, &attr, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + ret = dwarf_formref(attr, &cu_offs, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return cu_offs; +} + +/* Get entry pc(or low pc, 1st entry of ranges) of the die */ +static Dwarf_Addr die_get_entrypc(Dwarf_Die die) +{ + Dwarf_Attribute attr; + Dwarf_Addr addr; + Dwarf_Off offs; + Dwarf_Ranges *ranges; + Dwarf_Signed cnt; + int ret; + + /* Try to get entry pc */ + ret = dwarf_attr(die, DW_AT_entry_pc, &attr, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + ret = dwarf_formaddr(attr, &addr, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return addr; + } + + /* Try to get low pc */ + ret = dwarf_lowpc(die, &addr, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) + return addr; + + /* Try to get ranges */ + ret = dwarf_attr(die, DW_AT_ranges, &attr, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + ret = dwarf_formref(attr, &offs, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + ret = dwarf_get_ranges(__dw_debug, offs, &ranges, &cnt, NULL, + &__dw_error); + ERR_IF(ret != DW_DLV_OK); + addr = ranges[0].dwr_addr1; + dwarf_ranges_dealloc(__dw_debug, ranges, cnt); + return addr; +} + +/* + * Search a Die from Die tree. + * Note: cur_link->die should be deallocated in this function. + */ +static int __search_die_tree(struct die_link *cur_link, + int (*die_cb)(struct die_link *, void *), + void *data) +{ + Dwarf_Die new_die; + struct die_link new_link; + int ret; + + if (!die_cb) + return 0; + + /* Check current die */ + while (!(ret = die_cb(cur_link, data))) { + /* Check child die */ + ret = dwarf_child(cur_link->die, &new_die, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + new_link.parent = cur_link; + new_link.die = new_die; + ret = __search_die_tree(&new_link, die_cb, data); + if (ret) + break; + } + + /* Move to next sibling */ + ret = dwarf_siblingof(__dw_debug, cur_link->die, &new_die, + &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE); + cur_link->die = new_die; + if (ret == DW_DLV_NO_ENTRY) + return 0; + } + dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE); + return ret; +} + +/* Search a die in its children's die tree */ +static int search_die_from_children(Dwarf_Die parent_die, + int (*die_cb)(struct die_link *, void *), + void *data) +{ + struct die_link new_link; + int ret; + + new_link.parent = NULL; + ret = dwarf_child(parent_die, &new_link.die, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) + return __search_die_tree(&new_link, die_cb, data); + else + return 0; +} + +/* Find a locdesc corresponding to the address */ +static int attr_get_locdesc(Dwarf_Attribute attr, Dwarf_Locdesc *desc, + Dwarf_Addr addr) +{ + Dwarf_Signed lcnt; + Dwarf_Locdesc **llbuf; + int ret, i; + + ret = dwarf_loclist_n(attr, &llbuf, &lcnt, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + ret = DW_DLV_NO_ENTRY; + for (i = 0; i < lcnt; ++i) { + if (llbuf[i]->ld_lopc <= addr && + llbuf[i]->ld_hipc > addr) { + memcpy(desc, llbuf[i], sizeof(Dwarf_Locdesc)); + desc->ld_s = + malloc(sizeof(Dwarf_Loc) * llbuf[i]->ld_cents); + ERR_IF(desc->ld_s == NULL); + memcpy(desc->ld_s, llbuf[i]->ld_s, + sizeof(Dwarf_Loc) * llbuf[i]->ld_cents); + ret = DW_DLV_OK; + break; + } + dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK); + dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC); + } + /* Releasing loop */ + for (; i < lcnt; ++i) { + dwarf_dealloc(__dw_debug, llbuf[i]->ld_s, DW_DLA_LOC_BLOCK); + dwarf_dealloc(__dw_debug, llbuf[i], DW_DLA_LOCDESC); + } + dwarf_dealloc(__dw_debug, llbuf, DW_DLA_LIST); + return ret; +} + +/* + * Probe finder related functions + */ + +/* Show a location */ +static void show_location(Dwarf_Loc *loc, struct probe_finder *pf) +{ + Dwarf_Small op; + Dwarf_Unsigned regn; + Dwarf_Signed offs; + int deref = 0, ret; + const char *regs; + + op = loc->lr_atom; + + /* If this is based on frame buffer, set the offset */ + if (op == DW_OP_fbreg) { + deref = 1; + offs = (Dwarf_Signed)loc->lr_number; + op = pf->fbloc.ld_s[0].lr_atom; + loc = &pf->fbloc.ld_s[0]; + } else + offs = 0; + + if (op >= DW_OP_breg0 && op <= DW_OP_breg31) { + regn = op - DW_OP_breg0; + offs += (Dwarf_Signed)loc->lr_number; + deref = 1; + } else if (op >= DW_OP_reg0 && op <= DW_OP_reg31) { + regn = op - DW_OP_reg0; + } else if (op == DW_OP_bregx) { + regn = loc->lr_number; + offs += (Dwarf_Signed)loc->lr_number2; + deref = 1; + } else if (op == DW_OP_regx) { + regn = loc->lr_number; + } else + msg_exit(-EINVAL, "Dwarf_OP %d is not supported.\n", op); + + regs = get_arch_regstr(regn); + if (!regs) + msg_exit(-EINVAL, "%lld exceeds max register number.\n", regn); + + if (deref) + ret = snprintf(pf->buf, pf->len, + " %s=%+lld(%s)", pf->var, offs, regs); + else + ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs); + ERR_IF(ret < 0); + ERR_IF(ret >= pf->len); +} + +/* Show a variables in kprobe event format */ +static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf) +{ + Dwarf_Attribute attr; + Dwarf_Locdesc ld; + int ret; + + ret = dwarf_attr(vr_die, DW_AT_location, &attr, &__dw_error); + if (ret != DW_DLV_OK) + goto error; + ret = attr_get_locdesc(attr, &ld, (pf->addr - pf->cu_base)); + if (ret != DW_DLV_OK) + goto error; + /* TODO? */ + ERR_IF(ld.ld_cents != 1); + show_location(&ld.ld_s[0], pf); + free(ld.ld_s); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return ; +error: + msg_exit(-1, "Failed to find the location of %s at this address.\n" + " Perhaps, it was optimized out.\n", pf->var); +} + +static int variable_callback(struct die_link *dlink, void *data) +{ + struct probe_finder *pf = (struct probe_finder *)data; + Dwarf_Half tag; + int ret; + + ret = dwarf_tag(dlink->die, &tag, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if ((tag == DW_TAG_formal_parameter || + tag == DW_TAG_variable) && + (die_compare_name(dlink->die, pf->var) == 0)) { + show_variable(dlink->die, pf); + return 1; + } + /* TODO: Support struct members and arrays */ + return 0; +} + +/* Find a variable in a subprogram die */ +static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf) +{ + int ret; + + if (!is_c_varname(pf->var)) { + /* Output raw parameters */ + ret = snprintf(pf->buf, pf->len, " %s", pf->var); + ERR_IF(ret < 0); + ERR_IF(ret >= pf->len); + return ; + } + + debug("Searching '%s' variable in context.\n", pf->var); + /* Search child die for local variables and parameters. */ + ret = search_die_from_children(sp_die, variable_callback, pf); + if (!ret) + msg_exit(-1, "Failed to find '%s' in this function.\n", + pf->var); +} + +/* Get a frame base on the address */ +static void get_current_frame_base(Dwarf_Die sp_die, struct probe_finder *pf) +{ + Dwarf_Attribute attr; + int ret; + + ret = dwarf_attr(sp_die, DW_AT_frame_base, &attr, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + ret = attr_get_locdesc(attr, &pf->fbloc, (pf->addr - pf->cu_base)); + ERR_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); +} + +static void free_current_frame_base(struct probe_finder *pf) +{ + free(pf->fbloc.ld_s); + memset(&pf->fbloc, 0, sizeof(Dwarf_Locdesc)); +} + +/* Show a probe point to output buffer */ +static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs, + struct probe_finder *pf) +{ + struct probe_point *pp = pf->pp; + char *name; + char tmp[MAX_PROBE_BUFFER]; + int ret, i, len; + + /* Output name of probe point */ + ret = dwarf_diename(sp_die, &name, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_OK) { + ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%u", name, + (unsigned int)offs); + dwarf_dealloc(__dw_debug, name, DW_DLA_STRING); + } else { + /* This function has no name. */ + ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%llx", pf->addr); + } + ERR_IF(ret < 0); + ERR_IF(ret >= MAX_PROBE_BUFFER); + len = ret; + + /* Find each argument */ + get_current_frame_base(sp_die, pf); + for (i = 0; i < pp->nr_args; i++) { + pf->var = pp->args[i]; + pf->buf = &tmp[len]; + pf->len = MAX_PROBE_BUFFER - len; + find_variable(sp_die, pf); + len += strlen(pf->buf); + } + free_current_frame_base(pf); + + pp->probes[pp->found] = strdup(tmp); + pp->found++; +} + +static int probeaddr_callback(struct die_link *dlink, void *data) +{ + struct probe_finder *pf = (struct probe_finder *)data; + Dwarf_Half tag; + Dwarf_Signed offs; + int ret; + + ret = dwarf_tag(dlink->die, &tag, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + /* Check the address is in this subprogram */ + if (tag == DW_TAG_subprogram && + die_within_subprogram(dlink->die, pf->addr, &offs)) { + show_probepoint(dlink->die, offs, pf); + return 1; + } + return 0; +} + +/* Find probe point from its line number */ +static void find_by_line(Dwarf_Die cu_die, struct probe_finder *pf) +{ + struct probe_point *pp = pf->pp; + Dwarf_Signed cnt, i; + Dwarf_Line *lines; + Dwarf_Unsigned lineno = 0; + Dwarf_Addr addr; + Dwarf_Unsigned fno; + int ret; + + ret = dwarf_srclines(cu_die, &lines, &cnt, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + + for (i = 0; i < cnt; i++) { + ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + if (fno != pf->fno) + continue; + + ret = dwarf_lineno(lines[i], &lineno, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + if (lineno != (Dwarf_Unsigned)pp->line) + continue; + + ret = dwarf_lineaddr(lines[i], &addr, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + debug("Probe point found: 0x%llx\n", addr); + pf->addr = addr; + /* Search a real subprogram including this line, */ + ret = search_die_from_children(cu_die, probeaddr_callback, pf); + if (ret == 0) + msg_exit(-1, + "Probe point is not found in subprograms.\n"); + /* Continuing, because target line might be inlined. */ + } + dwarf_srclines_dealloc(__dw_debug, lines, cnt); +} + +/* Search function from function name */ +static int probefunc_callback(struct die_link *dlink, void *data) +{ + struct probe_finder *pf = (struct probe_finder *)data; + struct probe_point *pp = pf->pp; + struct die_link *lk; + Dwarf_Signed offs; + Dwarf_Half tag; + int ret; + + ret = dwarf_tag(dlink->die, &tag, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if (tag == DW_TAG_subprogram) { + if (die_compare_name(dlink->die, pp->function) == 0) { + if (die_inlined_subprogram(dlink->die)) { + /* Inlined function, save it. */ + ret = dwarf_die_CU_offset(dlink->die, + &pf->inl_offs, + &__dw_error); + ERR_IF(ret != DW_DLV_OK); + debug("inline definition offset %lld\n", + pf->inl_offs); + return 0; + } + /* Get probe address */ + pf->addr = die_get_entrypc(dlink->die); + pf->addr += pp->offset; + /* TODO: Check the address in this function */ + show_probepoint(dlink->die, pp->offset, pf); + /* Continue to search */ + } + } else if (tag == DW_TAG_inlined_subroutine && pf->inl_offs) { + if (die_get_abstract_origin(dlink->die) == pf->inl_offs) { + /* Get probe address */ + pf->addr = die_get_entrypc(dlink->die); + pf->addr += pp->offset; + debug("found inline addr: 0x%llx\n", pf->addr); + /* Inlined function. Get a real subprogram */ + for (lk = dlink->parent; lk != NULL; lk = lk->parent) { + tag = 0; + dwarf_tag(lk->die, &tag, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if (tag == DW_TAG_subprogram && + !die_inlined_subprogram(lk->die)) + goto found; + } + msg_exit(-1, "Failed to find real subprogram.\n"); +found: + /* Get offset from subprogram */ + ret = die_within_subprogram(lk->die, pf->addr, &offs); + ERR_IF(!ret); + show_probepoint(lk->die, offs, pf); + /* Continue to search */ + } + } + return 0; +} + +static void find_by_func(Dwarf_Die cu_die, struct probe_finder *pf) +{ + search_die_from_children(cu_die, probefunc_callback, pf); +} + +/* Find a probe point */ +int find_probepoint(int fd, struct probe_point *pp) +{ + Dwarf_Half addr_size = 0; + Dwarf_Unsigned next_cuh = 0; + Dwarf_Die cu_die = 0; + int cu_number = 0, ret; + struct probe_finder pf = {.pp = pp}; + + ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error); + if (ret != DW_DLV_OK) + msg_exit(-1, "Failed to call dwarf_init(). " + "Maybe, not a dwarf file?\n"); + + pp->found = 0; + while (++cu_number) { + /* Search CU (Compilation Unit) */ + ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL, + &addr_size, &next_cuh, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_NO_ENTRY) + break; + + /* Get the DIE(Debugging Information Entry) of this CU */ + ret = dwarf_siblingof(__dw_debug, 0, &cu_die, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + + /* Check if target file is included. */ + if (pp->file) + pf.fno = die_get_fileno(cu_die, pp->file); + + if (!pp->file || pf.fno) { + /* Save CU base address (for frame_base) */ + ret = dwarf_lowpc(cu_die, &pf.cu_base, &__dw_error); + ERR_IF(ret == DW_DLV_ERROR); + if (ret == DW_DLV_NO_ENTRY) + pf.cu_base = 0; + if (pp->line) + find_by_line(cu_die, &pf); + if (pp->function) + find_by_func(cu_die, &pf); + } + dwarf_dealloc(__dw_debug, cu_die, DW_DLA_DIE); + } + ret = dwarf_finish(__dw_debug, &__dw_error); + ERR_IF(ret != DW_DLV_OK); + + return pp->found; +} + diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h new file mode 100644 index 0000000..af920de --- /dev/null +++ b/tools/perf/util/probe-finder.h @@ -0,0 +1,68 @@ +#ifndef _PROBE_FINDER_H +#define _PROBE_FINDER_H + +#define _stringify(n) #n +#define stringify(n) _stringify(n) + +#ifdef DEBUG +#define debug(fmt ...) \ + fprintf(stderr, "DBG(" __FILE__ ":" stringify(__LINE__) "): " fmt) +#else +#define debug(fmt ...) do {} while (0) +#endif + +#define ERR_IF(cnd) \ + do { if (cnd) { \ + fprintf(stderr, "Error (" __FILE__ ":" stringify(__LINE__) \ + "): " stringify(cnd) "\n"); \ + exit(1); \ + } } while (0) + +#define MAX_PATH_LEN 256 +#define MAX_PROBE_BUFFER 1024 +#define MAX_PROBES 128 + +static inline int is_c_varname(const char *name) +{ + /* TODO */ + return isalpha(name[0]) || name[0] == '_'; +} + +struct probe_point { + /* Inputs */ + char *file; /* File name */ + int line; /* Line number */ + + char *function; /* Function name */ + int offset; /* Offset bytes */ + + int nr_args; /* Number of arguments */ + char **args; /* Arguments */ + + /* Output */ + int found; /* Number of found probe points */ + char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/ +}; + +extern int find_probepoint(int fd, struct probe_point *pp); + +#include +#include + +struct probe_finder { + struct probe_point *pp; /* Target probe point */ + + /* For function searching */ + Dwarf_Addr addr; /* Address */ + Dwarf_Unsigned fno; /* File number */ + Dwarf_Off inl_offs; /* Inline offset */ + + /* For variable searching */ + Dwarf_Addr cu_base; /* Current CU base address */ + Dwarf_Locdesc fbloc; /* Location of Current Frame Base */ + const char *var; /* Current variable name */ + char *buf; /* Current output buffer */ + int len; /* Length of output buffer */ +}; + +#endif /*_PROBE_FINDER_H */ -- cgit v0.10.2 From 23e8ec0d1c410f2f1d81050ee155db229abb1707 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 7 Oct 2009 18:28:30 -0400 Subject: perf probe: Add perf probe command support without libdwarf Enables 'perf probe' even if libdwarf is not installed. If libdwarf is not found, 'perf probe' just disables dwarf support. Users can use 'perf probe' to set up new events by using kprobe_events format. Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Christoph Hellwig Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Frank Ch. Eigler LKML-Reference: <20091007222830.1684.25665.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Frederic Weisbecker diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 6dabcf1..52b1f43 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -385,6 +385,7 @@ BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-timechart.o BUILTIN_OBJS += builtin-top.o BUILTIN_OBJS += builtin-trace.o +BUILTIN_OBJS += builtin-probe.o PERFLIBS = $(LIB_FILE) @@ -420,13 +421,12 @@ ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * endif ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) - msg := $(warning No libdwarf.h found, disables probe subcommand. Please install libdwarf-dev/libdwarf-devel); + msg := $(warning No libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel); + BASIC_CFLAGS += -DNO_LIBDWARF else EXTLIBS += -lelf -ldwarf LIB_H += util/probe-finder.h LIB_OBJS += util/probe-finder.o - BUILTIN_OBJS += builtin-probe.o - BASIC_CFLAGS += -DSUPPORT_DWARF endif ifdef NO_DEMANGLE diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 24b64b5..73c883b 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -54,6 +54,7 @@ const char *default_search_path[NR_SEARCH_PATH] = { static struct { char *vmlinux; char *release; + int need_dwarf; int nr_probe; struct probe_point probes[MAX_PROBES]; char *events[MAX_PROBES]; @@ -162,6 +163,8 @@ static int parse_probepoint(const struct option *opt __used, pp->function, pp->file, pp->offset); } free(argv[1]); + if (pp->file) + session.need_dwarf = 1; /* Copy arguments */ pp->nr_args = argc - 2; @@ -173,15 +176,19 @@ static int parse_probepoint(const struct option *opt __used, } /* Ensure return probe has no C argument */ - if (retp) - for (i = 0; i < pp->nr_args; i++) - if (is_c_varname(pp->args[i])) + for (i = 0; i < pp->nr_args; i++) + if (is_c_varname(pp->args[i])) { + if (retp) semantic_error("You can't specify local" " variable for kretprobe"); + session.need_dwarf = 1; + } + debug("%d arguments\n", pp->nr_args); return 0; } +#ifndef NO_LIBDWARF static int open_default_vmlinux(void) { struct utsname uts; @@ -209,6 +216,7 @@ static int open_default_vmlinux(void) } return fd; } +#endif static const char * const probe_usage[] = { "perf probe [] -P 'PROBEDEF' [-P 'PROBEDEF' ...]", @@ -216,10 +224,16 @@ static const char * const probe_usage[] = { }; static const struct option options[] = { +#ifndef NO_LIBDWARF OPT_STRING('k', "vmlinux", &session.vmlinux, "file", "vmlinux/module pathname"), +#endif OPT_CALLBACK('P', "probe", NULL, +#ifdef NO_LIBDWARF + "p|r:[GRP/]NAME FUNC[+OFFS] [ARG ...]", +#else "p|r:[GRP/]NAME FUNC[+OFFS][@SRC]|@SRC:LINE [ARG ...]", +#endif "probe point definition, where\n" "\t\tp:\tkprobe probe\n" "\t\tr:\tkretprobe probe\n" @@ -227,9 +241,13 @@ static const struct option options[] = { "\t\tNAME:\tEvent name\n" "\t\tFUNC:\tFunction name\n" "\t\tOFFS:\tOffset from function entry (in byte)\n" +#ifdef NO_LIBDWARF + "\t\tARG:\tProbe argument (only \n" +#else "\t\tSRC:\tSource code path\n" "\t\tLINE:\tLine number\n" "\t\tARG:\tProbe argument (local variable name or\n" +#endif "\t\t\tkprobe-tracer argument format is supported.)\n", parse_probepoint), OPT_END() @@ -279,7 +297,7 @@ error: int cmd_probe(int argc, const char **argv, const char *prefix __used) { - int i, j, fd, ret, need_dwarf = 0; + int i, j, fd, ret; struct probe_point *pp; char buf[MAX_CMDLEN]; @@ -288,12 +306,19 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) if (argc || session.nr_probe == 0) usage_with_options(probe_usage, options); - /* Synthesize return probes */ +#ifdef NO_LIBDWARF + if (session.need_dwarf) + semantic_error("Dwarf-analysis is not supported"); +#endif + + /* Synthesize probes without dwarf */ for (j = 0; j < session.nr_probe; j++) { +#ifndef NO_LIBDWARF if (session.events[j][0] != 'r') { - need_dwarf = 1; + session.need_dwarf = 1; continue; } +#endif ret = synthesize_probepoint(&session.probes[j]); if (ret == -E2BIG) semantic_error("probe point is too long."); @@ -303,7 +328,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) } } - if (!need_dwarf) +#ifndef NO_LIBDWARF + if (!session.need_dwarf) goto setup_probes; if (session.vmlinux) @@ -332,6 +358,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) close(fd); setup_probes: +#endif /* !NO_LIBDWARF */ + /* Settng up probe points */ snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path); fd = open(buf, O_WRONLY, O_APPEND); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index f598120..c570d17 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -295,9 +295,7 @@ static void handle_internal_command(int argc, const char **argv) { "version", cmd_version, 0 }, { "trace", cmd_trace, 0 }, { "sched", cmd_sched, 0 }, -#ifdef SUPPORT_DWARF { "probe", cmd_probe, 0 }, -#endif }; unsigned int i; static const char ext[] = STRIP_EXTENSION; diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index af920de..306810c 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -44,6 +44,7 @@ struct probe_point { char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/ }; +#ifndef NO_LIBDWARF extern int find_probepoint(int fd, struct probe_point *pp); #include @@ -64,5 +65,6 @@ struct probe_finder { char *buf; /* Current output buffer */ int len; /* Length of output buffer */ }; +#endif /* NO_LIBDWARF */ #endif /*_PROBE_FINDER_H */ -- cgit v0.10.2 From 7a693d3f0d10f978ebdf3082c41404ab97106567 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 13 Oct 2009 08:16:30 +0200 Subject: perf_events, x86: Fix event constraints code There was namespace overlap due to a rename i did - this caused the following build warning, reported by Stephen Rothwell against linux-next x86_64 allmodconfig: arch/x86/kernel/cpu/perf_event.c: In function 'intel_get_event_idx': arch/x86/kernel/cpu/perf_event.c:1445: warning: 'event_constraint' is used uninitialized in this function This is a real bug not just a warning: fix it by renaming the global event-constraints table pointer to 'event_constraints'. Reported-by: Stephen Rothwell Cc: Stephane Eranian Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091013144223.369d616d.sfr@canb.auug.org.au> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9961d845..2e20bca 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -124,7 +124,7 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; -static const struct event_constraint *event_constraint; +static const struct event_constraint *event_constraints; /* * Not sure about some of these @@ -1442,12 +1442,12 @@ intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) const struct event_constraint *event_constraint; int i, code; - if (!event_constraint) + if (!event_constraints) goto skip; code = hwc->config & CORE_EVNTSEL_EVENT_MASK; - for_each_event_constraint(event_constraint, event_constraint) { + for_each_event_constraint(event_constraint, event_constraints) { if (code == event_constraint->code) { for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { if (!test_and_set_bit(i, cpuc->used_mask)) @@ -2047,12 +2047,12 @@ static int p6_pmu_init(void) case 7: case 8: case 11: /* Pentium III */ - event_constraint = intel_p6_event_constraints; + event_constraints = intel_p6_event_constraints; break; case 9: case 13: /* Pentium M */ - event_constraint = intel_p6_event_constraints; + event_constraints = intel_p6_event_constraints; break; default: pr_cont("unsupported p6 CPU model %d ", @@ -2124,14 +2124,14 @@ static int intel_pmu_init(void) sizeof(hw_cache_event_ids)); pr_cont("Core2 events, "); - event_constraint = intel_core_event_constraints; + event_constraints = intel_core_event_constraints; break; default: case 26: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - event_constraint = intel_nehalem_event_constraints; + event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; case 28: -- cgit v0.10.2 From aef6f81b55f462082699c06e8e67e6eb5630ed45 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 12 Oct 2009 22:23:24 +0200 Subject: tracing: Rename set_ftrace to set_bootup_ftrace Do this rename because set_ftrace is too much generic and not enough self-explainable as a name. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Li Zefan diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4506826..866daf8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -129,7 +129,7 @@ static int tracing_set_tracer(const char *buf); static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; static char *default_bootup_tracer; -static int __init set_ftrace(char *str) +static int __init set_bootup_ftrace(char *str) { strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; @@ -137,7 +137,7 @@ static int __init set_ftrace(char *str) ring_buffer_expanded = 1; return 1; } -__setup("ftrace=", set_ftrace); +__setup("ftrace=", set_bootup_ftrace); static int __init set_ftrace_dump_on_oops(char *str) { -- cgit v0.10.2 From bf7c5b43a12614847b83f507fb169ad30640e406 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 12 Oct 2009 22:31:32 +0200 Subject: tracing: Remove unused ftrace_trace_addr helper Remove the ftrace_trace_addr() function as only its off-case is implemented and there are no users of it currently. But we keep ftrace_graph_addr() off-case, in case someone come to use the function graph tracer to profit from top-level callers filtering. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Li Zefan diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 365fb19..f22a7ac 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -483,10 +483,6 @@ static inline int ftrace_graph_addr(unsigned long addr) return 0; } #else -static inline int ftrace_trace_addr(unsigned long addr) -{ - return 1; -} static inline int ftrace_graph_addr(unsigned long addr) { return 1; -- cgit v0.10.2 From 8968f9d3dc23d9a1821d97c6f11e72a59382e56c Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Tue, 13 Oct 2009 16:19:41 +0900 Subject: perf_event, x86, mce: Use TRACE_EVENT() for MCE logging This approach is the first baby step towards solving many of the structural problems the x86 MCE logging code is having today: - It has a private ring-buffer implementation that has a number of limitations and has been historically fragile and buggy. - It is using a quirky /dev/mcelog ioctl driven ABI that is MCE specific. /dev/mcelog is not part of any larger logging framework and hence has remained on the fringes for many years. - The MCE logging code is still very unclean partly due to its ABI limitations. Fields are being reused for multiple purposes, and the whole message structure is limited and x86 specific to begin with. All in one, the x86 tree would like to move away from this private implementation of an event logging facility to a broader framework. By using perf events we gain the following advantages: - Multiple user-space agents can access MCE events. We can have an mcelog daemon running but also a system-wide tracer capturing important events in flight-recorder mode. - Sampling support: the kernel and the user-space call-chain of MCE events can be stored and analyzed as well. This way actual patterns of bad behavior can be matched to precisely what kind of activity happened in the kernel (and/or in the app) around that moment in time. - Coupling with other hardware and software events: the PMU can track a number of other anomalies - monitoring software might chose to monitor those plus the MCE events as well - in one coherent stream of events. - Discovery of MCE sources - tracepoints are enumerated and tools can act upon the existence (or non-existence) of various channels of MCE information. - Filtering support: we just subscribe to and act upon the events we are interested in. Then even on a per event source basis there's in-kernel filter expressions available that can restrict the amount of data that hits the event channel. - Arbitrary deep per cpu buffering of events - we can buffer 32 entries or we can buffer as much as we want, as long as we have the RAM. - An NMI-safe ring-buffer implementation - mappable to user-space. - Built-in support for timestamping of events, PID markers, CPU markers, etc. - A rich ABI accessible over system call interface. Per cpu, per task and per workload monitoring of MCE events can be done this way. The ABI itself has a nice, meaningful structure. - Extensible ABI: new fields can be added without breaking tooling. New tracepoints can be added as the hardware side evolves. There's various parsers that can be used. - Lots of scheduling/buffering/batching modes of operandi for MCE events. poll() support. mmap() support. read() support. You name it. - Rich tooling support: even without any MCE specific extensions added the 'perf' tool today offers various views of MCE data: perf report, perf stat, perf trace can all be used to view logged MCE events and perhaps correlate them to certain user-space usage patterns. But it can be used directly as well, for user-space agents and policy action in mcelog, etc. With this we hope to achieve significant code cleanup and feature improvements in the MCE code, and we hope to be able to drop the /dev/mcelog facility in the end. This patch is just a plain dumb dump of mce_log() records to the tracepoints / perf events framework - a first proof of concept step. Signed-off-by: Hidetoshi Seto Cc: Huang Ying Cc: Andi Kleen LKML-Reference: <4AD42A0D.7050104@jp.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1598a9..39caea3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -46,6 +46,9 @@ #include "mce-internal.h" +#define CREATE_TRACE_POINTS +#include + int mce_disabled __read_mostly; #define MISC_MCELOG_MINOR 227 @@ -141,6 +144,9 @@ void mce_log(struct mce *mce) { unsigned next, entry; + /* Emit the trace record: */ + trace_mce_record(mce); + mce->finished = 0; wmb(); for (;;) { diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h new file mode 100644 index 0000000..7eee778 --- /dev/null +++ b/include/trace/events/mce.h @@ -0,0 +1,69 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mce + +#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MCE_H + +#include +#include +#include + +TRACE_EVENT(mce_record, + + TP_PROTO(struct mce *m), + + TP_ARGS(m), + + TP_STRUCT__entry( + __field( u64, mcgcap ) + __field( u64, mcgstatus ) + __field( u8, bank ) + __field( u64, status ) + __field( u64, addr ) + __field( u64, misc ) + __field( u64, ip ) + __field( u8, cs ) + __field( u64, tsc ) + __field( u64, walltime ) + __field( u32, cpu ) + __field( u32, cpuid ) + __field( u32, apicid ) + __field( u32, socketid ) + __field( u8, cpuvendor ) + ), + + TP_fast_assign( + __entry->mcgcap = m->mcgcap; + __entry->mcgstatus = m->mcgstatus; + __entry->bank = m->bank; + __entry->status = m->status; + __entry->addr = m->addr; + __entry->misc = m->misc; + __entry->ip = m->ip; + __entry->cs = m->cs; + __entry->tsc = m->tsc; + __entry->walltime = m->time; + __entry->cpu = m->extcpu; + __entry->cpuid = m->cpuid; + __entry->apicid = m->apicid; + __entry->socketid = m->socketid; + __entry->cpuvendor = m->cpuvendor; + ), + + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + __entry->cpu, + __entry->mcgcap, __entry->mcgstatus, + __entry->bank, __entry->status, + __entry->addr, __entry->misc, + __entry->cs, __entry->ip, + __entry->tsc, + __entry->cpuvendor, __entry->cpuid, + __entry->walltime, + __entry->socketid, + __entry->apicid) +); + +#endif /* _TRACE_MCE_H */ + +/* This part must be outside protection */ +#include -- cgit v0.10.2 From cfed95a693e1ea5d08b9c9019bc30e448437ee2f Mon Sep 17 00:00:00 2001 From: Vincent Legoll Date: Tue, 13 Oct 2009 10:18:16 +0200 Subject: perf tools: Do not manually count string lengths Use strlen & macros instead of manually counting string lengths as this is error prone and may lend to bugs. Signed-off-by: Vincent Legoll Cc: Linus Torvalds LKML-Reference: <4727185d0910130118m5387058dndb02ac9b384af9f0@mail.gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 19fc7fe..624e62d 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -89,8 +89,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged) /* * Check remaining flags. */ - if (!prefixcmp(cmd, "--exec-path")) { - cmd += 11; + if (!prefixcmp(cmd, CMD_EXEC_PATH)) { + cmd += strlen(CMD_EXEC_PATH); if (*cmd == '=') perf_set_argv_exec_path(cmd + 1); else { @@ -117,8 +117,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged) (*argv)++; (*argc)--; handled++; - } else if (!prefixcmp(cmd, "--perf-dir=")) { - setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1); + } else if (!prefixcmp(cmd, CMD_PERF_DIR)) { + setenv(PERF_DIR_ENVIRONMENT, cmd + strlen(CMD_PERF_DIR), 1); if (envchanged) *envchanged = 1; } else if (!strcmp(cmd, "--work-tree")) { @@ -131,8 +131,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged) *envchanged = 1; (*argv)++; (*argc)--; - } else if (!prefixcmp(cmd, "--work-tree=")) { - setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1); + } else if (!prefixcmp(cmd, CMD_WORK_TREE)) { + setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + strlen(CMD_WORK_TREE), 1); if (envchanged) *envchanged = 1; } else if (!strcmp(cmd, "--debugfs-dir")) { @@ -146,8 +146,8 @@ static int handle_options(const char*** argv, int* argc, int* envchanged) *envchanged = 1; (*argv)++; (*argc)--; - } else if (!prefixcmp(cmd, "--debugfs-dir=")) { - strncpy(debugfs_mntpt, cmd + 14, MAXPATHLEN); + } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { + strncpy(debugfs_mntpt, cmd + strlen(CMD_DEBUGFS_DIR), MAXPATHLEN); debugfs_mntpt[MAXPATHLEN - 1] = '\0'; if (envchanged) *envchanged = 1; diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index f26172c..918eb37 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -5,6 +5,11 @@ #include "strbuf.h" #include "../perf.h" +#define CMD_EXEC_PATH "--exec-path" +#define CMD_PERF_DIR "--perf-dir=" +#define CMD_WORK_TREE "--work-tree=" +#define CMD_DEBUGFS_DIR "--debugfs-dir=" + #define PERF_DIR_ENVIRONMENT "PERF_DIR" #define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" -- cgit v0.10.2 From f4f0b418188cc7995375acbb54e87c80f21861bd Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 13 Oct 2009 14:57:20 +0200 Subject: perf tools: Remove expensive old debug code from perf top Calling gettimeofday() at high frequency is painful for handicapped boxen. The spot calling gettimeofday() is old unneeded debug code, so remove it. Reported-by: Ingo Molnar Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: Peter Zijlstra LKML-Reference: <1255438640.7173.1.camel@marge.simson.net> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c0f69e8..2d8806b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -870,8 +870,6 @@ static unsigned int mmap_read_head(struct mmap_data *md) return head; } -struct timeval last_read, this_read; - static void mmap_read_counter(struct mmap_data *md) { unsigned int head = mmap_read_head(md); @@ -879,8 +877,6 @@ static void mmap_read_counter(struct mmap_data *md) unsigned char *data = md->base + page_size; int diff; - gettimeofday(&this_read, NULL); - /* * If we're further behind than half the buffer, there's a chance * the writer will bite our tail and mess up the samples under us. @@ -891,14 +887,7 @@ static void mmap_read_counter(struct mmap_data *md) */ diff = head - old; if (diff > md->mask / 2 || diff < 0) { - struct timeval iv; - unsigned long msecs; - - timersub(&this_read, &last_read, &iv); - msecs = iv.tv_sec*1000 + iv.tv_usec/1000; - - fprintf(stderr, "WARNING: failed to keep up with mmap data." - " Last read %lu msecs ago.\n", msecs); + fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); /* * head points to a known good entry, start there. @@ -906,8 +895,6 @@ static void mmap_read_counter(struct mmap_data *md) old = head; } - last_read = this_read; - for (; old != head;) { event_t *event = (event_t *)&data[old & md->mask]; -- cgit v0.10.2 From d5b889f2ecec7849e851ddd31c34bdfb3482b5de Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 13 Oct 2009 11:16:29 -0300 Subject: perf tools: Move threads & last_match to threads.c This was just being copy'n'pasted all over. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <20091013141629.GD21809@ghostprotocols.net> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 8c84320..3fe0de0 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -37,10 +37,6 @@ static int print_line; static unsigned long page_size; static unsigned long mmap_window = 32; -static struct rb_root threads; -static struct thread *last_match; - - struct sym_ext { struct rb_node node; double percent; @@ -96,12 +92,10 @@ static int process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; - struct thread *thread; u64 ip = event->ip.ip; struct map *map = NULL; struct symbol *sym = NULL; - - thread = threads__findnew(event->ip.pid, &threads, &last_match); + struct thread *thread = threads__findnew(event->ip.pid); dump_printf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", (void *)(offset + head), @@ -166,10 +160,8 @@ got_map: static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread; struct map *map = map__new(&event->mmap, NULL, 0); - - thread = threads__findnew(event->mmap.pid, &threads, &last_match); + struct thread *thread = threads__findnew(event->mmap.pid); dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), @@ -194,9 +186,8 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread; + struct thread *thread = threads__findnew(event->comm.pid); - thread = threads__findnew(event->comm.pid, &threads, &last_match); dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), @@ -215,11 +206,9 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) static int process_fork_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread; - struct thread *parent; + struct thread *thread = threads__findnew(event->fork.pid); + struct thread *parent = threads__findnew(event->fork.ppid); - thread = threads__findnew(event->fork.pid, &threads, &last_match); - parent = threads__findnew(event->fork.ppid, &threads, &last_match); dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), @@ -558,7 +547,7 @@ static int __cmd_annotate(void) uint32_t size; char *buf; - register_idle_thread(&threads, &last_match); + register_idle_thread(); input = open(input_name, O_RDONLY); if (input < 0) { @@ -659,7 +648,7 @@ more: return 0; if (verbose > 3) - threads__fprintf(stdout, &threads); + threads__fprintf(stdout); if (verbose > 2) dsos__fprintf(stdout); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f57a23b..015c797 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -55,9 +55,6 @@ static char callchain_default_opt[] = "fractal,0.5"; static char *cwd; static int cwdlen; -static struct rb_root threads; -static struct thread *last_match; - static struct perf_header *header; static u64 sample_type; @@ -593,15 +590,13 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) { char level; struct symbol *sym = NULL; - struct thread *thread; u64 ip = event->ip.ip; u64 period = 1; struct map *map = NULL; void *more_data = event->ip.__more_data; struct ip_callchain *chain = NULL; int cpumode; - - thread = threads__findnew(event->ip.pid, &threads, &last_match); + struct thread *thread = threads__findnew(event->ip.pid); if (sample_type & PERF_SAMPLE_PERIOD) { period = *(u64 *)more_data; @@ -685,10 +680,8 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread; struct map *map = map__new(&event->mmap, cwd, cwdlen); - - thread = threads__findnew(event->mmap.pid, &threads, &last_match); + struct thread *thread = threads__findnew(event->mmap.pid); dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), @@ -714,9 +707,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread; - - thread = threads__findnew(event->comm.pid, &threads, &last_match); + struct thread *thread = threads__findnew(event->comm.pid); dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", (void *)(offset + head), @@ -736,11 +727,8 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) static int process_task_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread; - struct thread *parent; - - thread = threads__findnew(event->fork.pid, &threads, &last_match); - parent = threads__findnew(event->fork.ppid, &threads, &last_match); + struct thread *thread = threads__findnew(event->fork.pid); + struct thread *parent = threads__findnew(event->fork.ppid); dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n", (void *)(offset + head), @@ -857,7 +845,7 @@ static int __cmd_report(void) struct thread *idle; int ret; - idle = register_idle_thread(&threads, &last_match); + idle = register_idle_thread(); thread__comm_adjust(idle); if (show_threads) @@ -881,7 +869,7 @@ static int __cmd_report(void) return 0; if (verbose > 3) - threads__fprintf(stdout, &threads); + threads__fprintf(stdout); if (verbose > 2) dsos__fprintf(stdout); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 387a442..73bdad0 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -24,9 +24,6 @@ static char const *input_name = "perf.data"; static unsigned long total_comm = 0; -static struct rb_root threads; -static struct thread *last_match; - static struct perf_header *header; static u64 sample_type; @@ -641,9 +638,7 @@ static void test_calibrations(void) static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread; - - thread = threads__findnew(event->comm.tid, &threads, &last_match); + struct thread *thread = threads__findnew(event->comm.tid); dump_printf("%p [%p]: perf_event_comm: %s:%d\n", (void *)(offset + head), @@ -1086,8 +1081,8 @@ latency_switch_event(struct trace_switch_event *switch_event, die("hm, delta: %Ld < 0 ?\n", delta); - sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); - sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); + sched_out = threads__findnew(switch_event->prev_pid); + sched_in = threads__findnew(switch_event->next_pid); out_events = thread_atoms_search(&atom_root, sched_out, &cmp_pid); if (!out_events) { @@ -1120,13 +1115,10 @@ latency_runtime_event(struct trace_runtime_event *runtime_event, u64 timestamp, struct thread *this_thread __used) { - struct work_atoms *atoms; - struct thread *thread; + struct thread *thread = threads__findnew(runtime_event->pid); + struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); BUG_ON(cpu >= MAX_CPUS || cpu < 0); - - thread = threads__findnew(runtime_event->pid, &threads, &last_match); - atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); if (!atoms) { thread_atoms_insert(thread); atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); @@ -1153,7 +1145,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, if (!wakeup_event->success) return; - wakee = threads__findnew(wakeup_event->pid, &threads, &last_match); + wakee = threads__findnew(wakeup_event->pid); atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid); if (!atoms) { thread_atoms_insert(wakee); @@ -1202,7 +1194,7 @@ latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, if (profile_cpu == -1) return; - migrant = threads__findnew(migrate_task_event->pid, &threads, &last_match); + migrant = threads__findnew(migrate_task_event->pid); atoms = thread_atoms_search(&atom_root, migrant, &cmp_pid); if (!atoms) { thread_atoms_insert(migrant); @@ -1458,8 +1450,8 @@ map_switch_event(struct trace_switch_event *switch_event, die("hm, delta: %Ld < 0 ?\n", delta); - sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match); - sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match); + sched_out = threads__findnew(switch_event->prev_pid); + sched_in = threads__findnew(switch_event->next_pid); curr_thread[this_cpu] = sched_in; @@ -1649,7 +1641,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (!(sample_type & PERF_SAMPLE_RAW)) return 0; - thread = threads__findnew(event->ip.pid, &threads, &last_match); + thread = threads__findnew(event->ip.pid); if (sample_type & PERF_SAMPLE_TIME) { timestamp = *(u64 *)more_data; @@ -1725,7 +1717,7 @@ static struct perf_file_handler file_handler = { static int read_events(void) { - register_idle_thread(&threads, &last_match); + register_idle_thread(); register_perf_file_handler(&file_handler); return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index fb3f3c2..ccf867d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -19,9 +19,6 @@ static char const *input_name = "perf.data"; static unsigned long total = 0; static unsigned long total_comm = 0; -static struct rb_root threads; -static struct thread *last_match; - static struct perf_header *header; static u64 sample_type; @@ -32,9 +29,7 @@ static int cwdlen; static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread; - - thread = threads__findnew(event->comm.pid, &threads, &last_match); + struct thread *thread = threads__findnew(event->comm.pid); dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", (void *)(offset + head), @@ -54,14 +49,12 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) static int process_sample_event(event_t *event, unsigned long offset, unsigned long head) { - struct thread *thread; u64 ip = event->ip.ip; u64 timestamp = -1; u32 cpu = -1; u64 period = 1; void *more_data = event->ip.__more_data; - - thread = threads__findnew(event->ip.pid, &threads, &last_match); + struct thread *thread = threads__findnew(event->ip.pid); if (sample_type & PERF_SAMPLE_TIME) { timestamp = *(u64 *)more_data; @@ -135,7 +128,7 @@ static struct perf_file_handler file_handler = { static int __cmd_trace(void) { - register_idle_thread(&threads, &last_match); + register_idle_thread(); register_perf_file_handler(&file_handler); return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 3b56aeb..f53fad7 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -6,6 +6,9 @@ #include "util.h" #include "debug.h" +static struct rb_root threads; +static struct thread *last_match; + static struct thread *thread__new(pid_t pid) { struct thread *self = calloc(1, sizeof(*self)); @@ -50,10 +53,9 @@ static size_t thread__fprintf(struct thread *self, FILE *fp) return ret; } -struct thread * -threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) +struct thread *threads__findnew(pid_t pid) { - struct rb_node **p = &threads->rb_node; + struct rb_node **p = &threads.rb_node; struct rb_node *parent = NULL; struct thread *th; @@ -62,15 +64,15 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) * so most of the time we dont have to look up * the full rbtree: */ - if (*last_match && (*last_match)->pid == pid) - return *last_match; + if (last_match && last_match->pid == pid) + return last_match; while (*p != NULL) { parent = *p; th = rb_entry(parent, struct thread, rb_node); if (th->pid == pid) { - *last_match = th; + last_match = th; return th; } @@ -83,17 +85,16 @@ threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match) th = thread__new(pid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, threads); - *last_match = th; + rb_insert_color(&th->rb_node, &threads); + last_match = th; } return th; } -struct thread * -register_idle_thread(struct rb_root *threads, struct thread **last_match) +struct thread *register_idle_thread(void) { - struct thread *thread = threads__findnew(0, threads, last_match); + struct thread *thread = threads__findnew(0); if (!thread || thread__set_comm(thread, "swapper")) { fprintf(stderr, "problem inserting idle task.\n"); @@ -197,12 +198,12 @@ int thread__fork(struct thread *self, struct thread *parent) return 0; } -size_t threads__fprintf(FILE *fp, struct rb_root *threads) +size_t threads__fprintf(FILE *fp) { size_t ret = 0; struct rb_node *nd; - for (nd = rb_first(threads); nd; nd = rb_next(nd)) { + for (nd = rb_first(&threads); nd; nd = rb_next(nd)) { struct thread *pos = rb_entry(nd, struct thread, rb_node); ret += thread__fprintf(pos, fp); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 845d9b6..1abef3b 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -15,13 +15,11 @@ struct thread { }; int thread__set_comm(struct thread *self, const char *comm); -struct thread * -threads__findnew(pid_t pid, struct rb_root *threads, struct thread **last_match); -struct thread * -register_idle_thread(struct rb_root *threads, struct thread **last_match); +struct thread *threads__findnew(pid_t pid); +struct thread *register_idle_thread(void); void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *self, struct thread *parent); -size_t threads__fprintf(FILE *fp, struct rb_root *threads); +size_t threads__fprintf(FILE *fp); void maps__insert(struct rb_root *maps, struct map *map); struct map *maps__find(struct rb_root *maps, u64 ip); -- cgit v0.10.2 From 825332e4ff1373c55d931b49408df7ec2298f71e Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 14 Oct 2009 08:17:36 +1100 Subject: capabilities: simplify bound checks for copy_from_user() The capabilities syscall has a copy_from_user() call where gcc currently cannot prove to itself that the copy is always within bounds. This patch adds a very explicity bound check to prove to gcc that this copy_from_user cannot overflow its destination buffer. Signed-off-by: Arjan van de Ven Acked-by: James Morris Signed-off-by: Andrew Morton Signed-off-by: James Morris diff --git a/kernel/capability.c b/kernel/capability.c index 4e17041..c2316d3 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -238,7 +238,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) { struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; - unsigned i, tocopy; + unsigned i, tocopy, copybytes; kernel_cap_t inheritable, permitted, effective; struct cred *new; int ret; @@ -255,8 +255,11 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) if (pid != 0 && pid != task_pid_vnr(current)) return -EPERM; - if (copy_from_user(&kdata, data, - tocopy * sizeof(struct __user_cap_data_struct))) + copybytes = tocopy * sizeof(struct __user_cap_data_struct); + if (copybytes > sizeof(kdata)) + return -EFAULT; + + if (copy_from_user(&kdata, data, copybytes)) return -EFAULT; for (i = 0; i < tocopy; i++) { -- cgit v0.10.2 From 194ec34184869f0de1cf255c924fc5299e1b3d27 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 13 Oct 2009 16:33:50 -0400 Subject: function-graph/x86: Replace unbalanced ret with jmp The function graph tracer replaces the return address with a hook to trace the exit of the function call. This hook will finish by returning to the real location the function should return to. But the current implementation uses a ret to jump to the real return location. This causes a imbalance between calls and ret. That is the original function does a call, the ret goes to the handler and then the handler does a ret without a matching call. Although the function graph tracer itself still breaks the branch predictor by replacing the original ret, by using a second ret and causing an imbalance, it breaks the predictor even more. This patch replaces the ret with a jmp to keep the calls and ret balanced. I tested this on one box and it showed a 1.7% increase in performance. Another box only showed a small 0.3% increase. But no box that I tested this on showed a decrease in performance by making this change. Signed-off-by: Steven Rostedt Acked-by: Mathieu Desnoyers Cc: Frederic Weisbecker LKML-Reference: <20091013203425.042034383@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d..7d52e9d 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1185,17 +1185,14 @@ END(ftrace_graph_caller) .globl return_to_handler return_to_handler: - pushl $0 pushl %eax - pushl %ecx pushl %edx movl %ebp, %eax call ftrace_return_to_handler - movl %eax, 0xc(%esp) + movl %eax, %ecx popl %edx - popl %ecx popl %eax - ret + jmp *%ecx #endif .section .rodata,"a" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b5c061f..bd5bbdd 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -155,11 +155,11 @@ GLOBAL(return_to_handler) call ftrace_return_to_handler - movq %rax, 16(%rsp) + movq %rax, %rdi movq 8(%rsp), %rdx movq (%rsp), %rax - addq $16, %rsp - retq + addq $24, %rsp + jmp *%rdi #endif -- cgit v0.10.2 From 756d17ee7ee4fbc8238bdf97100af63e6ac441ef Mon Sep 17 00:00:00 2001 From: "jolsa@redhat.com" Date: Tue, 13 Oct 2009 16:33:52 -0400 Subject: tracing: Support multiple pids in set_pid_ftrace file Adding the possibility to set more than 1 pid in the set_pid_ftrace file, thus allowing to trace more than 1 independent processes. Usage: sh-4.0# echo 284 > ./set_ftrace_pid sh-4.0# cat ./set_ftrace_pid 284 sh-4.0# echo 1 >> ./set_ftrace_pid sh-4.0# echo 0 >> ./set_ftrace_pid sh-4.0# cat ./set_ftrace_pid swapper tasks 1 284 sh-4.0# echo 4 > ./set_ftrace_pid sh-4.0# cat ./set_ftrace_pid 4 sh-4.0# echo > ./set_ftrace_pid sh-4.0# cat ./set_ftrace_pid no pid sh-4.0# Signed-off-by: Jiri Olsa Cc: Frederic Weisbecker LKML-Reference: <20091013203425.565454612@goodmis.org> Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 45c9659..0c799d1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -60,6 +60,13 @@ static int last_ftrace_enabled; /* Quick disabling of function tracer. */ int function_trace_stop; +/* List for set_ftrace_pid's pids. */ +LIST_HEAD(ftrace_pids); +struct ftrace_pid { + struct list_head list; + struct pid *pid; +}; + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -159,7 +166,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops) else func = ftrace_list_func; - if (ftrace_pid_trace) { + if (!list_empty(&ftrace_pids)) { set_ftrace_pid_function(func); func = ftrace_pid_func; } @@ -207,7 +214,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_list->next == &ftrace_list_end) { ftrace_func_t func = ftrace_list->func; - if (ftrace_pid_trace) { + if (!list_empty(&ftrace_pids)) { set_ftrace_pid_function(func); func = ftrace_pid_func; } @@ -235,7 +242,7 @@ static void ftrace_update_pid_func(void) func = __ftrace_trace_function; #endif - if (ftrace_pid_trace) { + if (!list_empty(&ftrace_pids)) { set_ftrace_pid_function(func); func = ftrace_pid_func; } else { @@ -825,8 +832,6 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer) } #endif /* CONFIG_FUNCTION_PROFILER */ -/* set when tracing only a pid */ -struct pid *ftrace_pid_trace; static struct pid * const ftrace_swapper_pid = &init_struct_pid; #ifdef CONFIG_DYNAMIC_FTRACE @@ -2758,23 +2763,6 @@ static inline void ftrace_startup_enable(int command) { } # define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ -static ssize_t -ftrace_pid_read(struct file *file, char __user *ubuf, - size_t cnt, loff_t *ppos) -{ - char buf[64]; - int r; - - if (ftrace_pid_trace == ftrace_swapper_pid) - r = sprintf(buf, "swapper tasks\n"); - else if (ftrace_pid_trace) - r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace)); - else - r = sprintf(buf, "no pid\n"); - - return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -} - static void clear_ftrace_swapper(void) { struct task_struct *p; @@ -2825,14 +2813,12 @@ static void set_ftrace_pid(struct pid *pid) rcu_read_unlock(); } -static void clear_ftrace_pid_task(struct pid **pid) +static void clear_ftrace_pid_task(struct pid *pid) { - if (*pid == ftrace_swapper_pid) + if (pid == ftrace_swapper_pid) clear_ftrace_swapper(); else - clear_ftrace_pid(*pid); - - *pid = NULL; + clear_ftrace_pid(pid); } static void set_ftrace_pid_task(struct pid *pid) @@ -2843,11 +2829,140 @@ static void set_ftrace_pid_task(struct pid *pid) set_ftrace_pid(pid); } +static int ftrace_pid_add(int p) +{ + struct pid *pid; + struct ftrace_pid *fpid; + int ret = -EINVAL; + + mutex_lock(&ftrace_lock); + + if (!p) + pid = ftrace_swapper_pid; + else + pid = find_get_pid(p); + + if (!pid) + goto out; + + ret = 0; + + list_for_each_entry(fpid, &ftrace_pids, list) + if (fpid->pid == pid) + goto out_put; + + ret = -ENOMEM; + + fpid = kmalloc(sizeof(*fpid), GFP_KERNEL); + if (!fpid) + goto out_put; + + list_add(&fpid->list, &ftrace_pids); + fpid->pid = pid; + + set_ftrace_pid_task(pid); + + ftrace_update_pid_func(); + ftrace_startup_enable(0); + + mutex_unlock(&ftrace_lock); + return 0; + +out_put: + if (pid != ftrace_swapper_pid) + put_pid(pid); + +out: + mutex_unlock(&ftrace_lock); + return ret; +} + +static void ftrace_pid_reset(void) +{ + struct ftrace_pid *fpid, *safe; + + mutex_lock(&ftrace_lock); + list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) { + struct pid *pid = fpid->pid; + + clear_ftrace_pid_task(pid); + + list_del(&fpid->list); + kfree(fpid); + } + + ftrace_update_pid_func(); + ftrace_startup_enable(0); + + mutex_unlock(&ftrace_lock); +} + +static void *fpid_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&ftrace_lock); + + if (list_empty(&ftrace_pids) && (!*pos)) + return (void *) 1; + + return seq_list_start(&ftrace_pids, *pos); +} + +static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) +{ + if (v == (void *)1) + return NULL; + + return seq_list_next(v, &ftrace_pids, pos); +} + +static void fpid_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&ftrace_lock); +} + +static int fpid_show(struct seq_file *m, void *v) +{ + const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list); + + if (v == (void *)1) { + seq_printf(m, "no pid\n"); + return 0; + } + + if (fpid->pid == ftrace_swapper_pid) + seq_printf(m, "swapper tasks\n"); + else + seq_printf(m, "%u\n", pid_vnr(fpid->pid)); + + return 0; +} + +static const struct seq_operations ftrace_pid_sops = { + .start = fpid_start, + .next = fpid_next, + .stop = fpid_stop, + .show = fpid_show, +}; + +static int +ftrace_pid_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + if ((file->f_mode & FMODE_WRITE) && + (file->f_flags & O_TRUNC)) + ftrace_pid_reset(); + + if (file->f_mode & FMODE_READ) + ret = seq_open(file, &ftrace_pid_sops); + + return ret; +} + static ssize_t ftrace_pid_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct pid *pid; char buf[64]; long val; int ret; @@ -2860,57 +2975,38 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf, buf[cnt] = 0; + /* + * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid" + * to clean the filter quietly. + */ + strstrip(buf); + if (strlen(buf) == 0) + return 1; + ret = strict_strtol(buf, 10, &val); if (ret < 0) return ret; - mutex_lock(&ftrace_lock); - if (val < 0) { - /* disable pid tracing */ - if (!ftrace_pid_trace) - goto out; - - clear_ftrace_pid_task(&ftrace_pid_trace); - - } else { - /* swapper task is special */ - if (!val) { - pid = ftrace_swapper_pid; - if (pid == ftrace_pid_trace) - goto out; - } else { - pid = find_get_pid(val); - - if (pid == ftrace_pid_trace) { - put_pid(pid); - goto out; - } - } - - if (ftrace_pid_trace) - clear_ftrace_pid_task(&ftrace_pid_trace); - - if (!pid) - goto out; - - ftrace_pid_trace = pid; - - set_ftrace_pid_task(ftrace_pid_trace); - } + ret = ftrace_pid_add(val); - /* update the function call */ - ftrace_update_pid_func(); - ftrace_startup_enable(0); + return ret ? ret : cnt; +} - out: - mutex_unlock(&ftrace_lock); +static int +ftrace_pid_release(struct inode *inode, struct file *file) +{ + if (file->f_mode & FMODE_READ) + seq_release(inode, file); - return cnt; + return 0; } static const struct file_operations ftrace_pid_fops = { - .read = ftrace_pid_read, - .write = ftrace_pid_write, + .open = ftrace_pid_open, + .write = ftrace_pid_write, + .read = seq_read, + .llseek = seq_lseek, + .release = ftrace_pid_release, }; static __init int ftrace_init_debugfs(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f22a7ac..acef8b4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -496,12 +496,12 @@ print_graph_function(struct trace_iterator *iter) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -extern struct pid *ftrace_pid_trace; +extern struct list_head ftrace_pids; #ifdef CONFIG_FUNCTION_TRACER static inline int ftrace_trace_task(struct task_struct *task) { - if (!ftrace_pid_trace) + if (list_empty(&ftrace_pids)) return 1; return test_tsk_trace_trace(task); -- cgit v0.10.2 From 5cb084bb1f3fd4dcdaf7e4cf564994346ec8f783 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2009 16:33:53 -0400 Subject: tracing: Enable records during the module load I was debuging some module using "function" and "function_graph" tracers and noticed, that if you load module after you enabled tracing, the module's hooks will convert only to NOP instructions. The attached patch enables modules' hooks if there's function trace allready on, thus allowing to trace module functions. Signed-off-by: Jiri Olsa Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt LKML-Reference: <20091013203425.896285120@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0c799d1..aaea9cd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1270,12 +1270,34 @@ static int ftrace_update_code(struct module *mod) ftrace_new_addrs = p->newlist; p->flags = 0L; - /* convert record (i.e, patch mcount-call with NOP) */ - if (ftrace_code_disable(mod, p)) { - p->flags |= FTRACE_FL_CONVERTED; - ftrace_update_cnt++; - } else + /* + * Do the initial record convertion from mcount jump + * to the NOP instructions. + */ + if (!ftrace_code_disable(mod, p)) { ftrace_free_rec(p); + continue; + } + + p->flags |= FTRACE_FL_CONVERTED; + ftrace_update_cnt++; + + /* + * If the tracing is enabled, go ahead and enable the record. + * + * The reason not to enable the record immediatelly is the + * inherent check of ftrace_make_nop/ftrace_make_call for + * correct previous instructions. Making first the NOP + * conversion puts the module to the correct state, thus + * passing the ftrace_make_call check. + */ + if (ftrace_start_up) { + int failed = __ftrace_replace_code(p, 1); + if (failed) { + ftrace_bug(failed, p->ip); + ftrace_free_rec(p); + } + } } stop = ftrace_now(raw_smp_processor_id()); @@ -2609,7 +2631,7 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) return 0; } -static int ftrace_convert_nops(struct module *mod, +static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) { @@ -2669,7 +2691,7 @@ static void ftrace_init_module(struct module *mod, { if (ftrace_disabled || start == end) return; - ftrace_convert_nops(mod, start, end); + ftrace_process_locs(mod, start, end); } static int ftrace_module_notify(struct notifier_block *self, @@ -2730,7 +2752,7 @@ void __init ftrace_init(void) last_ftrace_enabled = ftrace_enabled = 1; - ret = ftrace_convert_nops(NULL, + ret = ftrace_process_locs(NULL, __start_mcount_loc, __stop_mcount_loc); -- cgit v0.10.2 From 4d8289494a37e19cd7f3beacea9c957ad3debad6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 13 Oct 2009 16:33:54 -0400 Subject: tracing: Enable "__cold" functions Based on the commit: a586df06 "x86: Support __attribute__((__cold__)) in gcc 4.3" some of the functions goes to the ".text.unlikely" section. Looks like there's not many of them (I found printk, panic, __ssb_dma_not_implemented, fat_fs_error), but still worth to include I think. Signed-off-by: Jiri Olsa Cc: Frederic Weisbecker Signed-off-by: Steven Rostedt LKML-Reference: <20091013203426.175845614@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 090d300..bfb8b2c 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -119,6 +119,7 @@ my %text_sections = ( ".sched.text" => 1, ".spinlock.text" => 1, ".irqentry.text" => 1, + ".text.unlikely" => 1, ); $objdump = "objdump" if ((length $objdump) == 0); -- cgit v0.10.2 From 9844ab11c763bfed9f054c82366b19dcda66aca9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Oct 2009 00:07:03 +0400 Subject: x86, apic: Introduce the NOOP apic driver Introduce NOOP APIC driver. We should use it in case if apic was disabled due to hardware of software/firmware problems (including user requested to disable it case). The driver is attempting to catch any inappropriate apic operation call with warning issue. Also it is possible to use some apic operation like IPI calls, read/write without checking for apic presence which should make callers code easier. Signed-off-by: Cyrill Gorcunov Cc: yinghai@kernel.org Cc: macro@linux-mips.org LKML-Reference: <20091013201022.534682104@openvz.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 474d80d..08a5f42 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -488,6 +488,8 @@ static inline unsigned int read_apic_id(void) extern void default_setup_apic_routing(void); +extern struct apic apic_noop; + #ifdef CONFIG_X86_32 extern struct apic apic_default; diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index da7b7b9..565c1bf 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -2,7 +2,7 @@ # Makefile for local APIC drivers and for the IO-APIC code # -obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_SMP) += ipi.o diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c new file mode 100644 index 0000000..0b93ec2 --- /dev/null +++ b/arch/x86/kernel/apic/apic_noop.c @@ -0,0 +1,194 @@ +/* + * NOOP APIC driver. + * + * Does almost nothing and should be substituted by a real apic driver via + * probe routine. + * + * Though in case if apic is disabled (for some reason) we try + * to not uglify the caller's code and allow to call (some) apic routines + * like self-ipi, etc... and issue a warning if an operation is not allowed + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +/* + * some operations should never be reached with + * noop apic if it's not turned off, this mostly + * means the caller forgot to disable apic (or + * check the apic presence) before doing a call + */ +static void warn_apic_enabled(void) +{ + WARN_ONCE((cpu_has_apic || !disable_apic), + "APIC: Called for NOOP operation with apic enabled\n"); +} + +/* + * To check operations but do not bloat source code + */ +#define NOOP_FUNC(func) func { warn_apic_enabled(); } +#define NOOP_FUNC_RET(func, ret) func { warn_apic_enabled(); return ret; } + +NOOP_FUNC(static void noop_init_apic_ldr(void)) +NOOP_FUNC(static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector)) +NOOP_FUNC(static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)) +NOOP_FUNC(static void noop_send_IPI_allbutself(int vector)) +NOOP_FUNC(static void noop_send_IPI_all(int vector)) +NOOP_FUNC(static void noop_send_IPI_self(int vector)) +NOOP_FUNC_RET(static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip), -1) +NOOP_FUNC(static void noop_apic_write(u32 reg, u32 v)) +NOOP_FUNC(void noop_apic_wait_icr_idle(void)) +NOOP_FUNC_RET(static u32 noop_safe_apic_wait_icr_idle(void), 0) +NOOP_FUNC_RET(static u64 noop_apic_icr_read(void), 0) +NOOP_FUNC(static void noop_apic_icr_write(u32 low, u32 id)) +NOOP_FUNC_RET(static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map), phys_map) +NOOP_FUNC_RET(static int noop_cpu_to_logical_apicid(int cpu), 1) +NOOP_FUNC_RET(static int noop_default_phys_pkg_id(int cpuid_apic, int index_msb), 0) +NOOP_FUNC_RET(static unsigned int noop_get_apic_id(unsigned long x), 0) + +static int noop_probe(void) +{ + /* should not ever be enabled this way */ + return 0; +} + +static int noop_apic_id_registered(void) +{ + warn_apic_enabled(); + return physid_isset(read_apic_id(), phys_cpu_present_map); +} + +static const struct cpumask *noop_target_cpus(void) +{ + warn_apic_enabled(); + + /* only BSP here */ + return cpumask_of(0); +} + +static unsigned long noop_check_apicid_used(physid_mask_t bitmap, int apicid) +{ + warn_apic_enabled(); + return physid_isset(apicid, bitmap); +} + +static unsigned long noop_check_apicid_present(int bit) +{ + warn_apic_enabled(); + return physid_isset(bit, phys_cpu_present_map); +} + +static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) +{ + warn_apic_enabled(); + if (cpu != 0) + pr_warning("APIC: Vector allocated for non-BSP cpu\n"); + cpumask_clear(retmask); + cpumask_set_cpu(cpu, retmask); +} + +int noop_apicid_to_node(int logical_apicid) +{ + warn_apic_enabled(); + + /* we're always on node 0 */ + return 0; +} + +static u32 noop_apic_read(u32 reg) +{ + /* + * noop-read is always safe until we have + * non-disabled unit + */ + WARN_ON_ONCE((cpu_has_apic && !disable_apic)); + return 0; +} + +struct apic apic_noop = { + .name = "noop", + .probe = noop_probe, + .acpi_madt_oem_check = NULL, + + .apic_id_registered = noop_apic_id_registered, + + .irq_delivery_mode = dest_LowestPrio, + /* logical delivery broadcast to all CPUs: */ + .irq_dest_mode = 1, + + .target_cpus = noop_target_cpus, + .disable_esr = 0, + .dest_logical = APIC_DEST_LOGICAL, + .check_apicid_used = noop_check_apicid_used, + .check_apicid_present = noop_check_apicid_present, + + .vector_allocation_domain = noop_vector_allocation_domain, + .init_apic_ldr = noop_init_apic_ldr, + + .ioapic_phys_id_map = noop_ioapic_phys_id_map, + .setup_apic_routing = NULL, + .multi_timer_check = NULL, + .apicid_to_node = noop_apicid_to_node, + + .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, + .cpu_present_to_apicid = default_cpu_present_to_apicid, + .apicid_to_cpu_present = default_apicid_to_cpu_present, + + .setup_portio_remap = NULL, + .check_phys_apicid_present = default_check_phys_apicid_present, + .enable_apic_mode = NULL, + + .phys_pkg_id = noop_default_phys_pkg_id, + + .mps_oem_check = NULL, + + .get_apic_id = noop_get_apic_id, + .set_apic_id = NULL, + .apic_id_mask = 0x0F << 24, + + .cpu_mask_to_apicid = default_cpu_mask_to_apicid, + .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, + + .send_IPI_mask = noop_send_IPI_mask, + .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, + .send_IPI_allbutself = noop_send_IPI_allbutself, + .send_IPI_all = noop_send_IPI_all, + .send_IPI_self = noop_send_IPI_self, + + .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, + + /* should be safe */ + .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, + .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, + + .wait_for_init_deassert = NULL, + + .smp_callin_clear_local_apic = NULL, + .inquire_remote_apic = NULL, + + .read = noop_apic_read, + .write = noop_apic_write, + .icr_read = noop_apic_icr_read, + .icr_write = noop_apic_icr_write, + .wait_icr_idle = noop_apic_wait_icr_idle, + .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, +}; -- cgit v0.10.2 From a933c61829509eb27083146dda392132baa0969a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Oct 2009 00:07:04 +0400 Subject: x86, apic: Use apic noop driver In case if apic were disabled we may use the whole apic NOOP driver instead of sparse poking the some functions in apic driver. Also NOOP would catch any inappropriate apic operation calls (not just read/write). Signed-off-by: Cyrill Gorcunov Cc: yinghai@kernel.org Cc: macro@linux-mips.org LKML-Reference: <20091013201022.747817361@openvz.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 894aa97..61a5628 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -241,28 +241,12 @@ static int modern_apic(void) } /* - * bare function to substitute write operation - * and it's _that_ fast :) - */ -static void native_apic_write_dummy(u32 reg, u32 v) -{ - WARN_ON_ONCE((cpu_has_apic || !disable_apic)); -} - -static u32 native_apic_read_dummy(u32 reg) -{ - WARN_ON_ONCE((cpu_has_apic && !disable_apic)); - return 0; -} - -/* - * right after this call apic->write/read doesn't do anything - * note that there is no restore operation it works one way + * right after this call apic become NOOP driven + * so apic->write/read doesn't do anything */ void apic_disable(void) { - apic->read = native_apic_read_dummy; - apic->write = native_apic_write_dummy; + apic = &apic_noop; } void native_apic_wait_icr_idle(void) -- cgit v0.10.2 From 2626eb2b2fd958dc0f683126aa84e93b939699a1 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Oct 2009 00:07:05 +0400 Subject: x86, apic: Limit apic dumping, introduce new show_lapic= setup option In case if a system has a large number of cpus printing apics contents may consume a long time period. We limit such an output by 1 apic by default. But to have an ability to see all apics or some part of them we introduce "show_lapic" setup option which allow us to limit/unlimit the number of APICs being dumped. Example: apic=debug show_lapic=5, or apic=debug show_lapic=all Also move apic_verbosity checking upper that way so helper routines do not need to inspect it at all. Suggested-by: Yinghai Lu Signed-off-by: Cyrill Gorcunov Cc: yinghai@kernel.org Cc: macro@linux-mips.org LKML-Reference: <20091013201022.926793122@openvz.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index dc69f28..8c718c9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1599,9 +1599,6 @@ __apicdebuginit(void) print_IO_APIC(void) struct irq_desc *desc; unsigned int irq; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", @@ -1708,9 +1705,6 @@ __apicdebuginit(void) print_APIC_field(int base) { int i; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG); for (i = 0; i < 8; i++) @@ -1724,9 +1718,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy) unsigned int i, v, ver, maxlvt; u64 icr; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); @@ -1824,13 +1815,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk("\n"); } -__apicdebuginit(void) print_all_local_APICs(void) +__apicdebuginit(void) print_local_APICs(int maxcpu) { int cpu; + if (!maxcpu) + return; + preempt_disable(); - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { + if (cpu >= maxcpu) + break; smp_call_function_single(cpu, print_local_APIC, NULL, 1); + } preempt_enable(); } @@ -1839,7 +1836,7 @@ __apicdebuginit(void) print_PIC(void) unsigned int v; unsigned long flags; - if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) + if (!nr_legacy_irqs) return; printk(KERN_DEBUG "\nprinting PIC contents\n"); @@ -1866,21 +1863,41 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -__apicdebuginit(int) print_all_ICs(void) +static int __initdata show_lapic = 1; +static __init int setup_show_lapic(char *arg) +{ + int num = -1; + + if (strcmp(arg, "all") == 0) { + show_lapic = CONFIG_NR_CPUS; + } else { + get_option(&arg, &num); + if (num >= 0) + show_lapic = num; + } + + return 1; +} +__setup("show_lapic=", setup_show_lapic); + +__apicdebuginit(int) print_ICs(void) { + if (apic_verbosity == APIC_QUIET) + return 0; + print_PIC(); /* don't print out if apic is not there */ if (!cpu_has_apic && !apic_from_smp_config()) return 0; - print_all_local_APICs(); + print_local_APICs(show_lapic); print_IO_APIC(); return 0; } -fs_initcall(print_all_ICs); +fs_initcall(print_ICs); /* Where if anywhere is the i8259 connect in external int mode */ -- cgit v0.10.2 From 6c2c502910247d2820cb630e7b28fb6bdecdbf45 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Wed, 30 Sep 2009 11:02:59 -0500 Subject: x86: SGI UV: Fix irq affinity for hub based interrupts This patch fixes handling of uv hub irq affinity. IRQs with ALL or NODE affinity can be routed to cpus other than their originally assigned cpu. Those with CPU affinity cannot be rerouted. Signed-off-by: Dimitri Sivanich LKML-Reference: <20090930160259.GA7822@sgi.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h index 9613c8c..5397e12 100644 --- a/arch/x86/include/asm/uv/uv_irq.h +++ b/arch/x86/include/asm/uv/uv_irq.h @@ -25,12 +25,21 @@ struct uv_IO_APIC_route_entry { dest : 32; }; +enum { + UV_AFFINITY_ALL, + UV_AFFINITY_NODE, + UV_AFFINITY_CPU +}; + extern struct irq_chip uv_irq_chip; -extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long); +extern int +arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int); extern void arch_disable_uv_irq(int, unsigned long); +extern int uv_set_irq_affinity(unsigned int, const struct cpumask *); -extern int uv_setup_irq(char *, int, int, unsigned long); -extern void uv_teardown_irq(unsigned int, int, unsigned long); +extern int uv_irq_2_mmr_info(int, unsigned long *, int *); +extern int uv_setup_irq(char *, int, int, unsigned long, int); +extern void uv_teardown_irq(unsigned int); #endif /* _ASM_X86_UV_UV_IRQ_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8c718c9..bb52e7f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3731,9 +3731,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) * on the specified blade to allow the sending of MSIs to the specified CPU. */ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset) + unsigned long mmr_offset, int restrict) { const struct cpumask *eligible_cpu = cpumask_of(cpu); + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; int mmr_pnode; unsigned long mmr_value; @@ -3749,6 +3750,11 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, if (err != 0) return err; + if (restrict == UV_AFFINITY_CPU) + desc->status |= IRQ_NO_BALANCING; + else + desc->status |= IRQ_MOVE_PCNTXT; + spin_lock_irqsave(&vector_lock, flags); set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, irq_name); @@ -3777,11 +3783,10 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, * Disable the specified MMR located on the specified blade so that MSIs are * longer allowed to be sent. */ -void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) +void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) { unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); @@ -3789,9 +3794,45 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) entry = (struct uv_IO_APIC_route_entry *)&mmr_value; entry->mask = 1; - mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); } + +int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + unsigned int dest; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + unsigned long mmr_offset; + unsigned mmr_pnode; + + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) + return -1; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = dest; + + /* Get previously stored MMR and pnode of hub sourcing interrupts */ + if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) + return -1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return 0; +} #endif /* CONFIG_X86_64 */ int __init io_apic_get_redir_entries (int ioapic) diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index aeef529..9a83775 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -9,10 +9,22 @@ */ #include +#include #include #include #include +#include + +/* MMR offset and pnode of hub sourcing interrupts for a given irq */ +struct uv_irq_2_mmr_pnode{ + struct rb_node list; + unsigned long offset; + int pnode; + int irq; +}; +static spinlock_t uv_irq_lock; +static struct rb_root uv_irq_root; static void uv_noop(unsigned int irq) { @@ -39,25 +51,106 @@ struct irq_chip uv_irq_chip = { .unmask = uv_noop, .eoi = uv_ack_apic, .end = uv_noop, + .set_affinity = uv_set_irq_affinity, }; /* + * Add offset and pnode information of the hub sourcing interrupts to the + * rb tree for a specific irq. + */ +static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) +{ + struct rb_node **link = &uv_irq_root.rb_node; + struct rb_node *parent = NULL; + struct uv_irq_2_mmr_pnode *n; + struct uv_irq_2_mmr_pnode *e; + unsigned long irqflags; + + n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, + uv_blade_to_memory_nid(blade)); + if (!n) + return -ENOMEM; + + n->irq = irq; + n->offset = offset; + n->pnode = uv_blade_to_pnode(blade); + spin_lock_irqsave(&uv_irq_lock, irqflags); + /* Find the right place in the rbtree: */ + while (*link) { + parent = *link; + e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); + + if (unlikely(irq == e->irq)) { + /* irq entry exists */ + e->pnode = uv_blade_to_pnode(blade); + e->offset = offset; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + kfree(n); + return 0; + } + + if (irq < e->irq) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; + } + + /* Insert the node into the rbtree. */ + rb_link_node(&n->list, parent, link); + rb_insert_color(&n->list, &uv_irq_root); + + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; +} + +/* Retrieve offset and pnode information from the rb tree for a specific irq */ +int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) +{ + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + + if (e->irq == irq) { + *offset = e->offset; + *pnode = e->pnode; + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return 0; + } + + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); + return -1; +} + +/* * Set up a mapping of an available irq and vector, and enable the specified * MMR that defines the MSI that is to be sent to the specified CPU when an * interrupt is raised. */ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, - unsigned long mmr_offset) + unsigned long mmr_offset, int restrict) { - int irq; - int ret; + int irq, ret; + + irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); - irq = create_irq(); if (irq <= 0) return -EBUSY; - ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); - if (ret != irq) + ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, + restrict); + if (ret == irq) + uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); + else destroy_irq(irq); return ret; @@ -71,9 +164,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); * * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). */ -void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) +void uv_teardown_irq(unsigned int irq) { - arch_disable_uv_irq(mmr_blade, mmr_offset); + struct uv_irq_2_mmr_pnode *e; + struct rb_node *n; + unsigned long irqflags; + + spin_lock_irqsave(&uv_irq_lock, irqflags); + n = uv_irq_root.rb_node; + while (n) { + e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); + if (e->irq == irq) { + arch_disable_uv_irq(e->pnode, e->offset); + rb_erase(n, &uv_irq_root); + kfree(e); + break; + } + if (irq < e->irq) + n = n->rb_left; + else + n = n->rb_right; + } + spin_unlock_irqrestore(&uv_irq_lock, irqflags); destroy_irq(irq); } EXPORT_SYMBOL_GPL(uv_teardown_irq); diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index c76677a..b5bbe59 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -106,7 +106,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); #if defined CONFIG_X86_64 - mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset); + mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, + UV_AFFINITY_CPU); if (mq->irq < 0) { dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", -mq->irq); @@ -136,7 +137,7 @@ static void xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq) { #if defined CONFIG_X86_64 - uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset); + uv_teardown_irq(mq->irq); #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV int mmr_pnode; -- cgit v0.10.2 From 9338ad6ffb70eca97f335d93c54943828c8b209e Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Tue, 13 Oct 2009 15:32:36 -0500 Subject: x86, apic: Move SGI UV functionality out of generic IO-APIC code Move UV specific functionality out of the generic IO-APIC code. Signed-off-by: Dimitri Sivanich LKML-Reference: <20091013203236.GD20543@sgi.com> [ Cleaned up the code some more in their new places. ] Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index ba180d9..56f0877 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -79,14 +79,31 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, int ioapic, int ioapic_pin, int trigger, int polarity) { - irq_attr->ioapic = ioapic; - irq_attr->ioapic_pin = ioapic_pin; - irq_attr->trigger = trigger; - irq_attr->polarity = polarity; + irq_attr->ioapic = ioapic; + irq_attr->ioapic_pin = ioapic_pin; + irq_attr->trigger = trigger; + irq_attr->polarity = polarity; } -extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, - struct io_apic_irq_attr *irq_attr); +/* + * This is performance-critical, we want to do it O(1) + * + * Most irqs are mapped 1:1 with pins. + */ +struct irq_cfg { + struct irq_pin_list *irq_2_pin; + cpumask_var_t domain; + cpumask_var_t old_domain; + unsigned move_cleanup_count; + u8 vector; + u8 move_in_progress : 1; +}; + +extern struct irq_cfg *irq_cfg(unsigned int); +extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); +extern void send_cleanup_vector(struct irq_cfg *); +extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); +extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); extern void setup_ioapic_dest(void); extern void enable_IO_APIC(void); diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h index 5397e12..d6b17c7 100644 --- a/arch/x86/include/asm/uv/uv_irq.h +++ b/arch/x86/include/asm/uv/uv_irq.h @@ -31,13 +31,6 @@ enum { UV_AFFINITY_CPU }; -extern struct irq_chip uv_irq_chip; - -extern int -arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int); -extern void arch_disable_uv_irq(int, unsigned long); -extern int uv_set_irq_affinity(unsigned int, const struct cpumask *); - extern int uv_irq_2_mmr_info(int, unsigned long *, int *); extern int uv_setup_irq(char *, int, int, unsigned long, int); extern void uv_teardown_irq(unsigned int); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index bb52e7f..ce16b65 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -60,8 +60,6 @@ #include #include #include -#include -#include #include @@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) return pin; } -/* - * This is performance-critical, we want to do it O(1) - * - * Most irqs are mapped 1:1 with pins. - */ -struct irq_cfg { - struct irq_pin_list *irq_2_pin; - cpumask_var_t domain; - cpumask_var_t old_domain; - unsigned move_cleanup_count; - u8 vector; - u8 move_in_progress : 1; -}; - /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ #ifdef CONFIG_SPARSE_IRQ static struct irq_cfg irq_cfgx[] = { @@ -209,7 +193,7 @@ int __init arch_early_irq_init(void) } #ifdef CONFIG_SPARSE_IRQ -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { struct irq_cfg *cfg = NULL; struct irq_desc *desc; @@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) /* end for move_irq_desc */ #else -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { return irq < nr_irqs ? irq_cfgx + irq : NULL; } @@ -1237,8 +1221,7 @@ next: return err; } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { int err; unsigned long flags; @@ -2245,7 +2228,7 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) +void send_cleanup_vector(struct irq_cfg *cfg) { cpumask_var_t cleanup_mask; @@ -2289,15 +2272,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); - /* * Either sets desc->affinity to a valid value, and returns * ->cpu_mask_to_apicid of that, or returns BAD_APICID and * leaves desc->affinity untouched. */ -static unsigned int +unsigned int set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; @@ -3725,116 +3705,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ -#ifdef CONFIG_X86_UV -/* - * Re-target the irq to the specified CPU and enable the specified MMR located - * on the specified blade to allow the sending of MSIs to the specified CPU. - */ -int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset, int restrict) -{ - const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_desc *desc = irq_to_desc(irq); - struct irq_cfg *cfg; - int mmr_pnode; - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - unsigned long flags; - int err; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - cfg = irq_cfg(irq); - - err = assign_irq_vector(irq, cfg, eligible_cpu); - if (err != 0) - return err; - - if (restrict == UV_AFFINITY_CPU) - desc->status |= IRQ_NO_BALANCING; - else - desc->status |= IRQ_MOVE_PCNTXT; - - spin_lock_irqsave(&vector_lock, flags); - set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, - irq_name); - spin_unlock_irqrestore(&vector_lock, flags); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return irq; -} - -/* - * Disable the specified MMR located on the specified blade so that MSIs are - * longer allowed to be sent. - */ -void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) -{ - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->mask = 1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); -} - -int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) -{ - struct irq_desc *desc = irq_to_desc(irq); - struct irq_cfg *cfg = desc->chip_data; - unsigned int dest; - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - unsigned long mmr_offset; - unsigned mmr_pnode; - - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) - return -1; - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = dest; - - /* Get previously stored MMR and pnode of hub sourcing interrupts */ - if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) - return -1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return 0; -} -#endif /* CONFIG_X86_64 */ - int __init io_apic_get_redir_entries (int ioapic) { union IO_APIC_reg_01 reg_01; diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 9a83775..61d805d 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -18,13 +18,16 @@ /* MMR offset and pnode of hub sourcing interrupts for a given irq */ struct uv_irq_2_mmr_pnode{ - struct rb_node list; - unsigned long offset; - int pnode; - int irq; + struct rb_node list; + unsigned long offset; + int pnode; + int irq; }; -static spinlock_t uv_irq_lock; -static struct rb_root uv_irq_root; + +static spinlock_t uv_irq_lock; +static struct rb_root uv_irq_root; + +static int uv_set_irq_affinity(unsigned int, const struct cpumask *); static void uv_noop(unsigned int irq) { @@ -132,6 +135,114 @@ int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) } /* + * Re-target the irq to the specified CPU and enable the specified MMR located + * on the specified blade to allow the sending of MSIs to the specified CPU. + */ +static int +arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, + unsigned long mmr_offset, int restrict) +{ + const struct cpumask *eligible_cpu = cpumask_of(cpu); + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg; + int mmr_pnode; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + int err; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + cfg = irq_cfg(irq); + + err = assign_irq_vector(irq, cfg, eligible_cpu); + if (err != 0) + return err; + + if (restrict == UV_AFFINITY_CPU) + desc->status |= IRQ_NO_BALANCING; + else + desc->status |= IRQ_MOVE_PCNTXT; + + set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, + irq_name); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return irq; +} + +/* + * Disable the specified MMR located on the specified blade so that MSIs are + * longer allowed to be sent. + */ +static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + + BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != + sizeof(unsigned long)); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + entry->mask = 1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +} + +static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + unsigned int dest; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + unsigned long mmr_offset; + unsigned mmr_pnode; + + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) + return -1; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = dest; + + /* Get previously stored MMR and pnode of hub sourcing interrupts */ + if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) + return -1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return 0; +} + +/* * Set up a mapping of an available irq and vector, and enable the specified * MMR that defines the MSI that is to be sent to the specified CPU when an * interrupt is raised. -- cgit v0.10.2 From c44fc770845163f8d9e573f37f92a7b7a7ade14e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 19 Sep 2009 06:50:42 +0200 Subject: tracing: Move syscalls metadata handling from arch to core Most of the syscalls metadata processing is done from arch. But these operations are mostly generic accross archs. Especially now that we have a common variable name that expresses the number of syscalls supported by an arch: NR_syscalls, the only remaining bits that need to reside in arch is the syscall nr to addr translation. v2: Compare syscalls symbols only after the "sys" prefix so that we avoid spurious mismatches with archs that have syscalls wrappers, in which case syscalls symbols have "SyS" prefixed aliases. (Reported by: Heiko Carstens) Signed-off-by: Frederic Weisbecker Acked-by: Heiko Carstens Cc: Ingo Molnar Cc: Steven Rostedt Cc: Li Zefan Cc: Masami Hiramatsu Cc: Jason Baron Cc: Lai Jiangshan Cc: Martin Schwidefsky Cc: Paul Mundt diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 57bdcb1..7c5752c 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -206,73 +206,10 @@ out: #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned int sys_call_table[]; -static struct syscall_metadata **syscalls_metadata; - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) -{ - int i; - - if (!syscalls_metadata) - return -1; - for (i = 0; i < NR_syscalls; i++) - if (syscalls_metadata[i]) - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) +unsigned long __init arch_syscall_addr(int nr) { - syscalls_metadata[num]->exit_id = id; -} - -static struct syscall_metadata *find_syscall_meta(unsigned long syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup(syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name + 3, str + 3)) - return start; - } - return NULL; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - struct syscall_metadata *meta; - int i; - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * NR_syscalls, - GFP_KERNEL); - if (!syscalls_metadata) - return -ENOMEM; - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta((unsigned long)sys_call_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)sys_call_table[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 25e6f5f..5a1b975 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -470,82 +470,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, #ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long __start_syscalls_metadata[]; -extern unsigned long __stop_syscalls_metadata[]; extern unsigned long *sys_call_table; -static struct syscall_metadata **syscalls_metadata; - -static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) -{ - struct syscall_metadata *start; - struct syscall_metadata *stop; - char str[KSYM_SYMBOL_LEN]; - - - start = (struct syscall_metadata *)__start_syscalls_metadata; - stop = (struct syscall_metadata *)__stop_syscalls_metadata; - kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); - - for ( ; start < stop; start++) { - if (start->name && !strcmp(start->name, str)) - return start; - } - return NULL; -} - -struct syscall_metadata *syscall_nr_to_meta(int nr) -{ - if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) - return NULL; - - return syscalls_metadata[nr]; -} - -int syscall_name_to_nr(char *name) -{ - int i; - - if (!syscalls_metadata) - return -1; - - for (i = 0; i < NR_syscalls; i++) { - if (syscalls_metadata[i]) { - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - } - } - return -1; -} - -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) +unsigned long __init arch_syscall_addr(int nr) { - syscalls_metadata[num]->exit_id = id; -} - -static int __init arch_init_ftrace_syscalls(void) -{ - int i; - struct syscall_metadata *meta; - unsigned long **psys_syscall_table = &sys_call_table; - - syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * - NR_syscalls, GFP_KERNEL); - if (!syscalls_metadata) { - WARN_ON(1); - return -ENOMEM; - } - - for (i = 0; i < NR_syscalls; i++) { - meta = find_syscall_meta(psys_syscall_table[i]); - syscalls_metadata[i] = meta; - } - return 0; + return (unsigned long)(&sys_call_table)[nr]; } -arch_initcall(arch_init_ftrace_syscalls); #endif diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5dc283b..e972f0a 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -33,7 +33,7 @@ struct syscall_metadata { }; #ifdef CONFIG_FTRACE_SYSCALLS -extern struct syscall_metadata *syscall_nr_to_meta(int nr); +extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 9fbce6c..8bda4bf 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -14,6 +14,69 @@ static int sys_refcount_exit; static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls); static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls); +extern unsigned long __start_syscalls_metadata[]; +extern unsigned long __stop_syscalls_metadata[]; + +static struct syscall_metadata **syscalls_metadata; + +static struct syscall_metadata *find_syscall_meta(unsigned long syscall) +{ + struct syscall_metadata *start; + struct syscall_metadata *stop; + char str[KSYM_SYMBOL_LEN]; + + + start = (struct syscall_metadata *)__start_syscalls_metadata; + stop = (struct syscall_metadata *)__stop_syscalls_metadata; + kallsyms_lookup(syscall, NULL, NULL, NULL, str); + + for ( ; start < stop; start++) { + /* + * Only compare after the "sys" prefix. Archs that use + * syscall wrappers may have syscalls symbols aliases prefixed + * with "SyS" instead of "sys", leading to an unwanted + * mismatch. + */ + if (start->name && !strcmp(start->name + 3, str + 3)) + return start; + } + return NULL; +} + +static struct syscall_metadata *syscall_nr_to_meta(int nr) +{ + if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) + return NULL; + + return syscalls_metadata[nr]; +} + +int syscall_name_to_nr(char *name) +{ + int i; + + if (!syscalls_metadata) + return -1; + + for (i = 0; i < NR_syscalls; i++) { + if (syscalls_metadata[i]) { + if (!strcmp(syscalls_metadata[i]->name, name)) + return i; + } + } + return -1; +} + +void set_syscall_enter_id(int num, int id) +{ + syscalls_metadata[num]->enter_id = id; +} + +void set_syscall_exit_id(int num, int id) +{ + syscalls_metadata[num]->exit_id = id; +} + enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) { @@ -375,6 +438,29 @@ struct trace_event event_syscall_exit = { .trace = print_syscall_exit, }; +int __init init_ftrace_syscalls(void) +{ + struct syscall_metadata *meta; + unsigned long addr; + int i; + + syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * + NR_syscalls, GFP_KERNEL); + if (!syscalls_metadata) { + WARN_ON(1); + return -ENOMEM; + } + + for (i = 0; i < NR_syscalls; i++) { + addr = arch_syscall_addr(i); + meta = find_syscall_meta(addr); + syscalls_metadata[i] = meta; + } + + return 0; +} +core_initcall(init_ftrace_syscalls); + #ifdef CONFIG_EVENT_PROFILE static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); -- cgit v0.10.2 From 459c6d15a0c52bae43842ff2cd0dd41aa7de9b7f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 19 Sep 2009 07:14:15 +0200 Subject: tracing: Document HAVE_SYSCALL_TRACEPOINTS needs Document the arch needed requirements to get the support for syscalls tracing. v2: HAVE_FTRACE_SYSCALLS have been changed to HAVE_SYSCALL_TRACEPOINTS recently. Update this config name in the documentation then. Signed-off-by: Frederic Weisbecker Acked-by: Heiko Carstens Cc: Ingo Molnar Cc: Steven Rostedt Cc: Li Zefan Cc: Masami Hiramatsu Cc: Jason Baron Cc: Lai Jiangshan Cc: Martin Schwidefsky Cc: Paul Mundt diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 7003e10..641a1ef 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -213,10 +213,19 @@ If you can't trace NMI functions, then skip this option.
-HAVE_FTRACE_SYSCALLS +HAVE_SYSCALL_TRACEPOINTS --------------------- -
+You need very few things to get the syscalls tracing in an arch. + +- Have a NR_syscalls variable in that provides the number + of syscalls supported by the arch. +- Implement arch_syscall_addr() that resolves a syscall address from a + syscall number. +- Support the TIF_SYSCALL_TRACEPOINT thread flags +- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace + in the ptrace syscalls tracing path. +- Tag this arch as HAVE_SYSCALL_TRACEPOINTS. HAVE_FTRACE_MCOUNT_RECORD -- cgit v0.10.2 From 7ec13187ef48b04bb7f6dfa266c7271a52d009c2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 14 Oct 2009 15:06:42 +0200 Subject: x86, apic: Fix prototype in hw_irq.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This warning: In file included from arch/x86/include/asm/ipi.h:23, from arch/x86/kernel/apic/apic_noop.c:27: arch/x86/include/asm/hw_irq.h:105: warning: ‘struct irq_desc’ declared inside parameter list arch/x86/include/asm/hw_irq.h:105: warning: its scope is only this definition or declaration, which is probably not what you want triggers because irq_desc is defined after hw_irq.h is included in irq.h. Since it's pointer reference only, a forward declaration of the type will solve the problem. LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 56f0877..1984ce9 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -102,6 +102,8 @@ struct irq_cfg { extern struct irq_cfg *irq_cfg(unsigned int); extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *); extern void send_cleanup_vector(struct irq_cfg *); + +struct irq_desc; extern unsigned int set_desc_affinity(struct irq_desc *, const struct cpumask *); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr); extern void setup_ioapic_dest(void); -- cgit v0.10.2 From ac06ea2cd06291e63951b51dd7c9a23e6a1f2683 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 10 Oct 2009 09:35:48 +0200 Subject: x86: Remove BKL from microcode cycle_lock_kernel() in microcode_open() is a worthless exercise as there is nothing to wait for. Remove it. Signed-off-by: Thomas Gleixner LKML-Reference: <20091010153349.196074920@linutronix.de> diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 378e9a8..2bcad39 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -73,7 +73,6 @@ #include #include #include -#include #include #include #include @@ -201,7 +200,6 @@ static int do_microcode_update(const void __user *buf, size_t size) static int microcode_open(struct inode *unused1, struct file *unused2) { - cycle_kernel_lock(); return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; } -- cgit v0.10.2 From 05d86412eab6a18cf57697474cc4f8fbfcd6936f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 9 Oct 2009 19:02:20 +0200 Subject: x86: Remove BKL from apm_32 The lock/unlock kernel pair in do_open() got there with the BKL push down and protects nothing. Remove it. Replace the lock/unlock kernel in the ioctl code with a mutex to protect standbys_pending and suspends_pending. Signed-off-by: Thomas Gleixner LKML-Reference: <20091010153349.365236337@linutronix.de> diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 151ace6..b5b6b23 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -204,7 +204,6 @@ #include #include -#include #include #include #include @@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); static struct apm_user *user_list; static DEFINE_SPINLOCK(user_list_lock); +static DEFINE_MUTEX(apm_mutex); /* * Set up a segment that references the real mode segment 0x40 @@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) return -EPERM; switch (cmd) { case APM_IOC_STANDBY: - lock_kernel(); + mutex_lock(&apm_mutex); if (as->standbys_read > 0) { as->standbys_read--; as->standbys_pending--; @@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) queue_event(APM_USER_STANDBY, as); if (standbys_pending <= 0) standby(); - unlock_kernel(); + mutex_unlock(&apm_mutex); break; case APM_IOC_SUSPEND: - lock_kernel(); + mutex_lock(&apm_mutex); if (as->suspends_read > 0) { as->suspends_read--; as->suspends_pending--; @@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) queue_event(APM_USER_SUSPEND, as); if (suspends_pending <= 0) { ret = suspend(1); + mutex_unlock(&apm_mutex); } else { as->suspend_wait = 1; + mutex_unlock(&apm_mutex); wait_event_interruptible(apm_suspend_waitqueue, as->suspend_wait == 0); ret = as->suspend_result; } - unlock_kernel(); return ret; default: return -ENOTTY; @@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp) { struct apm_user *as; - lock_kernel(); as = kmalloc(sizeof(*as), GFP_KERNEL); if (as == NULL) { printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", sizeof(*as)); - unlock_kernel(); return -ENOMEM; } as->magic = APM_BIOS_MAGIC; @@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp) user_list = as; spin_unlock(&user_list_lock); filp->private_data = as; - unlock_kernel(); return 0; } -- cgit v0.10.2 From 9636bc0555e3f383c120ddcffe4b7c5c58a10b1a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Oct 2009 19:09:04 +0400 Subject: x86, apic: Explain show_lapic= in kernel parameters list Signed-off-by: Cyrill Gorcunov Cc: yinghai@kernel.org Cc: macro@linux-mips.org LKML-Reference: <20091014150904.GA5259@lenovo> Signed-off-by: Ingo Molnar diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9107b38..465a786 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -345,6 +345,15 @@ and is between 256 and 4096 characters. It is defined in the file Change the amount of debugging information output when initialising the APIC and IO-APIC components. + show_lapic= [APIC,X86] Advanced Programmable Interrupt Controller + Limit apic dumping. The parameter defines the maximal + number of local apics being dumped. Also it is possible + to set it to "all" by meaning -- no limit here. + Format: { 1 (default) | 2 | ... | all }. + The parameter valid if only apic=debug or + apic=verbose is specified. + Example: apic=debug show_lapic=all + apm= [APM] Advanced Power Management See header of arch/x86/kernel/apm_32.c. -- cgit v0.10.2 From 06f43d66ec36388056f5c697bf1e67c0e0a1645c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 14 Oct 2009 20:43:39 +0200 Subject: ftrace: Copy ftrace_graph_filter boot param using strlcpy We are using strncpy in the wrong way to copy the ftrace_graph_filter boot param because we pass the buffer size instead of the max string size it can contain (buffer size - 1). The end result might not be NULL terminated as we are abusing the max string size. Lets use strlcpy() instead. Reported-by: Li Zefan Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index aaea9cd..b10c0d9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2293,7 +2293,7 @@ __setup("ftrace_filter=", set_ftrace_filter); #ifdef CONFIG_FUNCTION_GRAPH_TRACER static int __init set_graph_function(char *str) { - strncpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); + strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); return 1; } __setup("ftrace_graph_filter=", set_graph_function); -- cgit v0.10.2 From 1beee96bae0daf7f491356777c3080cc436950f5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 14 Oct 2009 20:50:32 +0200 Subject: ftrace: Rename set_bootup_ftrace into set_cmdline_ftrace set_cmdline_ftrace is a better match against what does this function: apply a tracer name from the kernel command line. Reported-by: Steven Rostedt Signed-off-by: Frederic Weisbecker Cc: Li Zefan diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4311ec3..026e715 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -129,7 +129,7 @@ static int tracing_set_tracer(const char *buf); static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; static char *default_bootup_tracer; -static int __init set_bootup_ftrace(char *str) +static int __init set_cmdline_ftrace(char *str) { strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); default_bootup_tracer = bootup_tracer_buf; @@ -137,7 +137,7 @@ static int __init set_bootup_ftrace(char *str) ring_buffer_expanded = 1; return 1; } -__setup("ftrace=", set_bootup_ftrace); +__setup("ftrace=", set_cmdline_ftrace); static int __init set_ftrace_dump_on_oops(char *str) { -- cgit v0.10.2 From 924a79af2cdee26a034b9bdce8c9c76995b5c901 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:32 -0400 Subject: perf tools: Handle print concatenations in event format file kmem_alloc ftrace event format had a string that was broken up by two tokens. "string 1" "string 2". This patch lets the parser be able to handle the concatenation. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194357.253818714@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index eef60df..a05c714 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1734,6 +1734,7 @@ static int event_read_print(struct event *event) if (read_expect_type(EVENT_DQUOTE, &token) < 0) goto fail; + concat: event->print_fmt.format = token; event->print_fmt.args = NULL; @@ -1743,6 +1744,21 @@ static int event_read_print(struct event *event) if (type == EVENT_NONE) return 0; + /* Handle concatination of print lines */ + if (type == EVENT_DQUOTE) { + char *cat; + + cat = malloc_or_die(strlen(event->print_fmt.format) + + strlen(token) + 1); + strcpy(cat, event->print_fmt.format); + strcat(cat, token); + free_token(token); + free_token(event->print_fmt.format); + event->print_fmt.format = NULL; + token = cat; + goto concat; + } + if (test_type_token(type, token, EVENT_DELIM, (char *)",")) goto fail; -- cgit v0.10.2 From 91ff2bc191827f0d3f5ad0a433ff7df7d2dd9aee Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:33 -0400 Subject: perf tools: Fix backslash processing on trace print formats The handling of backslashes was broken. It would stop parsing when encountering one. Also, '\n', '\t', '\r' and '\\' were not converted. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194357.521974680@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index a05c714..2b75ec2 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -522,7 +522,10 @@ static enum event_type __read_token(char **tok) last_ch = ch; ch = __read_char(); buf[i++] = ch; - } while (ch != quote_ch && last_ch != '\\'); + /* the '\' '\' will cancel itself */ + if (ch == '\\' && last_ch == '\\') + last_ch = 0; + } while (ch != quote_ch || last_ch == '\\'); /* remove the last quote */ i--; goto out; @@ -2325,7 +2328,27 @@ static void pretty_print(void *data, int size, struct event *event) for (; *ptr; ptr++) { ls = 0; - if (*ptr == '%') { + if (*ptr == '\\') { + ptr++; + switch (*ptr) { + case 'n': + printf("\n"); + break; + case 't': + printf("\t"); + break; + case 'r': + printf("\r"); + break; + case '\\': + printf("\\"); + break; + default: + printf("%c", *ptr); + break; + } + + } else if (*ptr == '%') { saveptr = ptr; show_func = 0; cont_process: -- cgit v0.10.2 From 298ebc3ef2a6c569b3eb51651f04e26aecbf8a1d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:34 -0400 Subject: perf tools: Handle trace parsing of < and > The code to handle the '<' and '>' ops was all in place, but they were not in the switch statement to consider them as valid ops. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194357.807434040@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 2b75ec2..3e643f5 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1170,6 +1170,8 @@ process_op(struct event *event, struct print_arg *arg, char **tok) strcmp(token, "*") == 0 || strcmp(token, "^") == 0 || strcmp(token, "/") == 0 || + strcmp(token, "<") == 0 || + strcmp(token, ">") == 0 || strcmp(token, "==") == 0 || strcmp(token, "!=") == 0) { -- cgit v0.10.2 From 0959b8d65ce26131c2d5ccfa518a7b76529280fa Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:35 -0400 Subject: perf tools: Handle arrays in print fields for trace parsing The array used by the ftrace stack events (caller[x]) causes issues with the parser. This adds code to handle the case, but it also assumes that the array is of type long. Note, this is a special case used (currently) only by the ftrace user and kernel stack records. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194358.124833639@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 3e643f5..7aeedb0 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1046,6 +1046,35 @@ out_free: return EVENT_ERROR; } +static enum event_type +process_array(struct event *event, struct print_arg *top, char **tok) +{ + struct print_arg *arg; + enum event_type type; + char *token = NULL; + + arg = malloc_or_die(sizeof(*arg)); + memset(arg, 0, sizeof(*arg)); + + *tok = NULL; + type = process_arg(event, arg, &token); + if (test_type_token(type, token, EVENT_OP, (char *)"]")) + goto out_free; + + top->op.right = arg; + + free_token(token); + type = read_token_item(&token); + *tok = token; + + return type; + +out_free: + free_token(*tok); + free_arg(arg); + return EVENT_ERROR; +} + static int get_op_prio(char *op) { if (!op[1]) { @@ -1192,6 +1221,18 @@ process_op(struct event *event, struct print_arg *arg, char **tok) arg->op.right = right; + } else if (strcmp(token, "[") == 0) { + + left = malloc_or_die(sizeof(*left)); + *left = *arg; + + arg->type = PRINT_OP; + arg->op.op = token; + arg->op.left = left; + + arg->op.prio = 0; + type = process_array(event, arg, tok); + } else { die("unknown op '%s'", token); /* the arg is now the left side */ @@ -1931,6 +1972,7 @@ static unsigned long long eval_num_arg(void *data, int size, { unsigned long long val = 0; unsigned long long left, right; + struct print_arg *larg; switch (arg->type) { case PRINT_NULL: @@ -1957,6 +1999,26 @@ static unsigned long long eval_num_arg(void *data, int size, return 0; break; case PRINT_OP: + if (strcmp(arg->op.op, "[") == 0) { + /* + * Arrays are special, since we don't want + * to read the arg as is. + */ + if (arg->op.left->type != PRINT_FIELD) + goto default_op; /* oops, all bets off */ + larg = arg->op.left; + if (!larg->field.field) { + larg->field.field = + find_any_field(event, larg->field.name); + if (!larg->field.field) + die("field %s not found", larg->field.name); + } + right = eval_num_arg(data, size, event, arg->op.right); + val = read_size(data + larg->field.field->offset + + right * long_size, long_size); + break; + } + default_op: left = eval_num_arg(data, size, event, arg->op.left); right = eval_num_arg(data, size, event, arg->op.right); switch (arg->op.op[0]) { -- cgit v0.10.2 From b99af874829cba2b30d212bc6fd31b56275ee4d2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:36 -0400 Subject: perf tools: Handle * as typecast in trace parsing The '*' is currently only treated as a multiplication, and it needs to be handled as a typecast pointer. This is the version used by trace-cmd. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194358.409327875@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 7aeedb0..f73ee55 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1217,7 +1217,24 @@ process_op(struct event *event, struct print_arg *arg, char **tok) right = malloc_or_die(sizeof(*right)); - type = process_arg(event, right, tok); + type = read_token_item(&token); + *tok = token; + + /* could just be a type pointer */ + if ((strcmp(arg->op.op, "*") == 0) && + type == EVENT_DELIM && (strcmp(token, ")") == 0)) { + if (left->type != PRINT_ATOM) + die("bad pointer type"); + left->atom.atom = realloc(left->atom.atom, + sizeof(left->atom.atom) + 3); + strcat(left->atom.atom, " *"); + *arg = *left; + free(arg); + + return type; + } + + type = process_arg_token(event, right, tok, type); arg->op.right = right; @@ -1548,7 +1565,6 @@ process_paren(struct event *event, struct print_arg *arg, char **tok) { struct print_arg *item_arg; enum event_type type; - int ptr_cast = 0; char *token; type = process_arg(event, arg, &token); @@ -1556,26 +1572,11 @@ process_paren(struct event *event, struct print_arg *arg, char **tok) if (type == EVENT_ERROR) return EVENT_ERROR; - if (type == EVENT_OP) { - /* handle the ptr casts */ - if (!strcmp(token, "*")) { - /* - * FIXME: should we zapp whitespaces before ')' ? - * (may require a peek_token_item()) - */ - if (__peek_char() == ')') { - ptr_cast = 1; - free_token(token); - type = read_token_item(&token); - } - } - if (!ptr_cast) { - type = process_op(event, arg, &token); + if (type == EVENT_OP) + type = process_op(event, arg, &token); - if (type == EVENT_ERROR) - return EVENT_ERROR; - } - } + if (type == EVENT_ERROR) + return EVENT_ERROR; if (test_type_token(type, token, EVENT_DELIM, (char *)")")) { free_token(token); @@ -1601,13 +1602,6 @@ process_paren(struct event *event, struct print_arg *arg, char **tok) item_arg = malloc_or_die(sizeof(*item_arg)); arg->type = PRINT_TYPE; - if (ptr_cast) { - char *old = arg->atom.atom; - - arg->atom.atom = malloc_or_die(strlen(old + 3)); - sprintf(arg->atom.atom, "%s *", old); - free(old); - } arg->typecast.type = arg->atom.atom; arg->typecast.item = item_arg; type = process_arg_token(event, item_arg, &token, type); -- cgit v0.10.2 From f1d1feecf07261d083859ecfef0d4399036f9683 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:37 -0400 Subject: perf tools: Handle newlines in trace parsing better New lines between args in the trace format can break the parsing. This should not be the case. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194358.637991808@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index f73ee55..59e4e4d 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1716,12 +1716,18 @@ process_arg_token(struct event *event, struct print_arg *arg, static int event_read_print_args(struct event *event, struct print_arg **list) { - enum event_type type; + enum event_type type = EVENT_ERROR; struct print_arg *arg; char *token; int args = 0; do { + if (type == EVENT_NEWLINE) { + free_token(token); + type = read_token_item(&token); + continue; + } + arg = malloc_or_die(sizeof(*arg)); memset(arg, 0, sizeof(*arg)); -- cgit v0.10.2 From 13999e59343b042b0807be2df6ae5895d29782a0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:38 -0400 Subject: perf tools: Handle the case with and without the "signed" trace field The trace format files now have a "signed" field. But we should still be able to handle the kernels that do not have this field. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194358.888239553@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 59e4e4d..0739b12 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -924,23 +924,30 @@ static int event_read_fields(struct event *event, struct format_field **fields) if (read_expected(EVENT_OP, (char *)";") < 0) goto fail_expect; - if (read_expected(EVENT_ITEM, (char *)"signed") < 0) - goto fail_expect; + type = read_token(&token); + if (type != EVENT_NEWLINE) { + /* newer versions of the kernel have a "signed" type */ + if (test_type_token(type, token, EVENT_ITEM, (char *)"signed")) + goto fail; - if (read_expected(EVENT_OP, (char *)":") < 0) - goto fail_expect; + free_token(token); - if (read_expect_type(EVENT_ITEM, &token)) - goto fail; - if (strtoul(token, NULL, 0)) - field->flags |= FIELD_IS_SIGNED; - free_token(token); + if (read_expected(EVENT_OP, (char *)":") < 0) + goto fail_expect; - if (read_expected(EVENT_OP, (char *)";") < 0) - goto fail_expect; + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + + /* add signed type */ + + free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) + goto fail_expect; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + } - if (read_expect_type(EVENT_NEWLINE, &token) < 0) - goto fail; free_token(token); *fields = field; @@ -2949,21 +2956,23 @@ static void print_args(struct print_arg *args) } } -static void parse_header_field(char *type, +static void parse_header_field(char *field, int *offset, int *size) { char *token; + int type; if (read_expected(EVENT_ITEM, (char *)"field") < 0) return; if (read_expected(EVENT_OP, (char *)":") < 0) return; + /* type */ if (read_expect_type(EVENT_ITEM, &token) < 0) - return; + goto fail; free_token(token); - if (read_expected(EVENT_ITEM, type) < 0) + if (read_expected(EVENT_ITEM, field) < 0) return; if (read_expected(EVENT_OP, (char *)";") < 0) return; @@ -2972,7 +2981,7 @@ static void parse_header_field(char *type, if (read_expected(EVENT_OP, (char *)":") < 0) return; if (read_expect_type(EVENT_ITEM, &token) < 0) - return; + goto fail; *offset = atoi(token); free_token(token); if (read_expected(EVENT_OP, (char *)";") < 0) @@ -2982,22 +2991,36 @@ static void parse_header_field(char *type, if (read_expected(EVENT_OP, (char *)":") < 0) return; if (read_expect_type(EVENT_ITEM, &token) < 0) - return; + goto fail; *size = atoi(token); free_token(token); if (read_expected(EVENT_OP, (char *)";") < 0) return; - if (read_expected(EVENT_ITEM, (char *)"signed") < 0) - return; - if (read_expected(EVENT_OP, (char *)":") < 0) - return; - if (read_expect_type(EVENT_ITEM, &token) < 0) - return; - free_token(token); - if (read_expected(EVENT_OP, (char *)";") < 0) - return; - if (read_expect_type(EVENT_NEWLINE, &token) < 0) - return; + type = read_token(&token); + if (type != EVENT_NEWLINE) { + /* newer versions of the kernel have a "signed" type */ + if (type != EVENT_ITEM) + goto fail; + + if (strcmp(token, (char *)"signed") != 0) + goto fail; + + free_token(token); + + if (read_expected(EVENT_OP, (char *)":") < 0) + return; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + + free_token(token); + if (read_expected(EVENT_OP, (char *)";") < 0) + return; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + } + fail: free_token(token); } -- cgit v0.10.2 From 07a4bdddcf2546ccfbfb3c782deab636c371edeb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:39 -0400 Subject: perf tools: Still continue on failed parsing of an event Even though an event may fail to parse, we should not kill the entire report. The trace should still be able to show what it can. If an event fails to parse, a warning is printed, and the output continues. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194359.190809589@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 0739b12..eda0a24 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -613,7 +613,7 @@ static enum event_type read_token_item(char **tok) static int test_type(enum event_type type, enum event_type expect) { if (type != expect) { - die("Error: expected type %d but read %d", + warning("Error: expected type %d but read %d", expect, type); return -1; } @@ -624,13 +624,13 @@ static int test_type_token(enum event_type type, char *token, enum event_type expect, const char *expect_tok) { if (type != expect) { - die("Error: expected type %d but read %d", + warning("Error: expected type %d but read %d", expect, type); return -1; } if (strcmp(token, expect_tok) != 0) { - die("Error: expected '%s' but read '%s'", + warning("Error: expected '%s' but read '%s'", expect_tok, token); return -1; } @@ -668,7 +668,7 @@ static int __read_expected(enum event_type expect, const char *str, int newline_ free_token(token); - return 0; + return ret; } static int read_expected(enum event_type expect, const char *str) @@ -1258,12 +1258,12 @@ process_op(struct event *event, struct print_arg *arg, char **tok) type = process_array(event, arg, tok); } else { - die("unknown op '%s'", token); + warning("unknown op '%s'", token); + event->flags |= EVENT_FL_FAILED; /* the arg is now the left side */ return EVENT_NONE; } - if (type == EVENT_OP) { int prio; @@ -2873,7 +2873,7 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, event = trace_find_event(type); if (!event) { - printf("ug! no event found for type %d\n", type); + warning("ug! no event found for type %d", type); return; } @@ -2887,6 +2887,12 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, comm, pid, cpu, secs, nsecs, event->name); + if (event->flags & EVENT_FL_FAILED) { + printf("EVENT '%s' FAILED TO PARSE\n", + event->name); + return; + } + pretty_print(data, size, event); printf("\n"); } @@ -3120,12 +3126,16 @@ int parse_event_file(char *buf, unsigned long size, char *sys) die("failed to read event id"); ret = event_read_format(event); - if (ret < 0) - die("failed to read event format"); + if (ret < 0) { + warning("failed to read event format for %s", event->name); + goto event_failed; + } ret = event_read_print(event); - if (ret < 0) - die("failed to read event print fmt"); + if (ret < 0) { + warning("failed to read event print fmt for %s", event->name); + goto event_failed; + } event->system = strdup(sys); @@ -3135,6 +3145,12 @@ int parse_event_file(char *buf, unsigned long size, char *sys) add_event(event); return 0; + + event_failed: + event->flags |= EVENT_FL_FAILED; + /* still add it even if it failed */ + add_event(event); + return -1; } void parse_set_info(int nr_cpus, int long_sz) diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index da77e07..29821ac 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -139,12 +139,14 @@ struct event { }; enum { - EVENT_FL_ISFTRACE = 1, - EVENT_FL_ISPRINT = 2, - EVENT_FL_ISBPRINT = 4, - EVENT_FL_ISFUNC = 8, - EVENT_FL_ISFUNCENT = 16, - EVENT_FL_ISFUNCRET = 32, + EVENT_FL_ISFTRACE = 0x01, + EVENT_FL_ISPRINT = 0x02, + EVENT_FL_ISBPRINT = 0x04, + EVENT_FL_ISFUNC = 0x08, + EVENT_FL_ISFUNCENT = 0x10, + EVENT_FL_ISFUNCRET = 0x20, + + EVENT_FL_FAILED = 0x80000000 }; struct record { -- cgit v0.10.2 From ffa1895561645103d8f8059b35d9c06e6eeead2e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:40 -0400 Subject: perf tools: Fix bprintk reading in trace output The bprintk parsing was broken in more ways than one. The file parsing was incorrect, and the words used by the arguments are always 4 bytes aligned, even on 64-bit machines. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194359.520931637@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index eda0a24..93a82fe 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -284,18 +284,16 @@ void parse_ftrace_printk(char *file, unsigned int size __unused) char *line; char *next = NULL; char *addr_str; - int ret; + char *fmt; int i; line = strtok_r(file, "\n", &next); while (line) { item = malloc_or_die(sizeof(*item)); - ret = sscanf(line, "%as : %as", - (float *)(void *)&addr_str, /* workaround gcc warning */ - (float *)(void *)&item->printk); + addr_str = strtok_r(line, ":", &fmt); item->addr = strtoull(addr_str, NULL, 16); - free(addr_str); - + /* fmt still has a space, skip it */ + item->printk = strdup(fmt+1); item->next = list; list = item; line = strtok_r(NULL, "\n", &next); @@ -2274,8 +2272,9 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc case 'u': case 'x': case 'i': - bptr = (void *)(((unsigned long)bptr + (long_size - 1)) & - ~(long_size - 1)); + /* the pointers are always 4 bytes aligned */ + bptr = (void *)(((unsigned long)bptr + 3) & + ~3); switch (ls) { case 0: case 1: -- cgit v0.10.2 From 0d1da915c76838c9ee7af7cdefbcb2bae9424161 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:41 -0400 Subject: perf tools: Handle both versions of ftrace output The ftrace output events can have either arguments or no arguments. The parser needs to be able to handle both. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194359.790221427@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 93a82fe..c174765 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1819,7 +1819,7 @@ static int event_read_print(struct event *event) if (ret < 0) return -1; - return 0; + return ret; fail: free_token(token); @@ -3088,6 +3088,9 @@ int parse_ftrace_file(char *buf, unsigned long size) if (ret < 0) die("failed to read ftrace event print fmt"); + /* New ftrace handles args */ + if (ret > 0) + return 0; /* * The arguments for ftrace files are parsed by the fields. * Set up the fields as their arguments. -- cgit v0.10.2 From cda48461c7fb8431a99b7960480f5f42cc1a5324 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:42 -0400 Subject: perf tools: Add latency format to trace output Add the irqs disabled, preemption count, need resched, and other info that is shown in the latency format of ftrace. # perf trace -l perf-16457 2..s2. 53636.260344: kmem_cache_free: call_site=ffffffff811198f perf-16457 2..s2. 53636.264330: kmem_cache_free: call_site=ffffffff811198f perf-16457 2d.s4. 53636.300006: kmem_cache_free: call_site=ffffffff810d889 Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194400.076588953@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ccf867d..ce8459a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -144,6 +144,8 @@ static const struct option options[] = { "dump raw trace in ASCII"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('l', "latency", &latency_format, + "show latency attributes (irqs/preemption disabled, etc)"), OPT_END() }; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index c174765..fde1a43 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -40,6 +40,8 @@ int header_page_size_size; int header_page_data_offset; int header_page_data_size; +int latency_format; + static char *input_buf; static unsigned long long input_buf_ptr; static unsigned long long input_buf_siz; @@ -1928,37 +1930,67 @@ static int get_common_info(const char *type, int *offset, int *size) return 0; } -int trace_parse_common_type(void *data) +static int __parse_common(void *data, int *size, int *offset, + char *name) { - static int type_offset; - static int type_size; int ret; - if (!type_size) { - ret = get_common_info("common_type", - &type_offset, - &type_size); + if (!*size) { + ret = get_common_info(name, offset, size); if (ret < 0) return ret; } - return read_size(data + type_offset, type_size); + return read_size(data + *offset, *size); +} + +int trace_parse_common_type(void *data) +{ + static int type_offset; + static int type_size; + + return __parse_common(data, &type_size, &type_offset, + (char *)"common_type"); } static int parse_common_pid(void *data) { static int pid_offset; static int pid_size; + + return __parse_common(data, &pid_size, &pid_offset, + (char *)"common_pid"); +} + +static int parse_common_pc(void *data) +{ + static int pc_offset; + static int pc_size; + + return __parse_common(data, &pc_size, &pc_offset, + (char *)"common_preempt_count"); +} + +static int parse_common_flags(void *data) +{ + static int flags_offset; + static int flags_size; + + return __parse_common(data, &flags_size, &flags_offset, + (char *)"common_flags"); +} + +static int parse_common_lock_depth(void *data) +{ + static int ld_offset; + static int ld_size; int ret; - if (!pid_size) { - ret = get_common_info("common_pid", - &pid_offset, - &pid_size); - if (ret < 0) - return ret; - } + ret = __parse_common(data, &ld_size, &ld_offset, + (char *)"common_lock_depth"); + if (ret < 0) + return -1; - return read_size(data + pid_offset, pid_size); + return ret; } struct event *trace_find_event(int id) @@ -2525,6 +2557,41 @@ static inline int log10_cpu(int nb) return 1; } +static void print_lat_fmt(void *data, int size __unused) +{ + unsigned int lat_flags; + unsigned int pc; + int lock_depth; + int hardirq; + int softirq; + + lat_flags = parse_common_flags(data); + pc = parse_common_pc(data); + lock_depth = parse_common_lock_depth(data); + + hardirq = lat_flags & TRACE_FLAG_HARDIRQ; + softirq = lat_flags & TRACE_FLAG_SOFTIRQ; + + printf("%c%c%c", + (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' : + (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ? + 'X' : '.', + (lat_flags & TRACE_FLAG_NEED_RESCHED) ? + 'N' : '.', + (hardirq && softirq) ? 'H' : + hardirq ? 'h' : softirq ? 's' : '.'); + + if (pc) + printf("%x", pc); + else + printf("."); + + if (lock_depth < 0) + printf("."); + else + printf("%d", lock_depth); +} + /* taken from Linux, written by Frederic Weisbecker */ static void print_graph_cpu(int cpu) { @@ -2768,6 +2835,11 @@ pretty_print_func_ent(void *data, int size, struct event *event, printf(" | "); + if (latency_format) { + print_lat_fmt(data, size); + printf(" | "); + } + field = find_field(event, "func"); if (!field) die("function entry does not have func field"); @@ -2811,6 +2883,11 @@ pretty_print_func_ret(void *data, int size __unused, struct event *event, printf(" | "); + if (latency_format) { + print_lat_fmt(data, size); + printf(" | "); + } + field = find_field(event, "rettime"); if (!field) die("can't find rettime in return graph"); @@ -2882,9 +2959,14 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, return pretty_print_func_graph(data, size, event, cpu, pid, comm, secs, usecs); - printf("%16s-%-5d [%03d] %5lu.%09Lu: %s: ", - comm, pid, cpu, - secs, nsecs, event->name); + if (latency_format) { + printf("%8.8s-%-5d %3d", + comm, pid, cpu); + print_lat_fmt(data, size); + } else + printf("%16s-%-5d [%03d]", comm, pid, cpu); + + printf(" %5lu.%06lu: %s: ", secs, usecs, event->name); if (event->flags & EVENT_FL_FAILED) { printf("EVENT '%s' FAILED TO PARSE\n", diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 29821ac..f6637c2 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -239,6 +239,8 @@ extern int header_page_size_size; extern int header_page_data_offset; extern int header_page_data_size; +extern int latency_format; + int parse_header_page(char *buf, unsigned long size); int trace_parse_common_type(void *data); struct event *trace_find_event(int id); @@ -248,4 +250,13 @@ void *raw_field_ptr(struct event *event, const char *name, void *data); void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); +/* taken from kernel/trace/trace.h */ +enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_IRQS_NOSUPPORT = 0x02, + TRACE_FLAG_NEED_RESCHED = 0x04, + TRACE_FLAG_HARDIRQ = 0x08, + TRACE_FLAG_SOFTIRQ = 0x10, +}; + #endif /* __PERF_TRACE_EVENTS_H */ -- cgit v0.10.2 From afdf1a404eed236d6f762ee44cc0f1dcc97206e0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:43 -0400 Subject: perf tools: Handle - and + in parsing trace print format The opterators '-' and '+' are not handled in the trace print format. To do: '++' and '--'. Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194400.330843045@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index fde1a43..2d424ff 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -2106,6 +2106,12 @@ static unsigned long long eval_num_arg(void *data, int size, die("unknown op '%s'", arg->op.op); val = left == right; break; + case '-': + val = left - right; + break; + case '+': + val = left + right; + break; default: die("unknown op '%s'", arg->op.op); } -- cgit v0.10.2 From c4dc775f53136cd6af8f88bce67cce9b42751768 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 14 Oct 2009 15:43:44 -0400 Subject: perf tools: Remove all char * typecasts and use const in prototype The (char *) for all the static strings was a fix for the symptom and not the disease. The real issue was that the function prototypes needed to be declared "const char *". Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091014194400.635935008@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 2d424ff..4b61b49 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -685,10 +685,10 @@ static char *event_read_name(void) { char *token; - if (read_expected(EVENT_ITEM, (char *)"name") < 0) + if (read_expected(EVENT_ITEM, "name") < 0) return NULL; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return NULL; if (read_expect_type(EVENT_ITEM, &token) < 0) @@ -706,10 +706,10 @@ static int event_read_id(void) char *token; int id; - if (read_expected_item(EVENT_ITEM, (char *)"ID") < 0) + if (read_expected_item(EVENT_ITEM, "ID") < 0) return -1; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return -1; if (read_expect_type(EVENT_ITEM, &token) < 0) @@ -759,7 +759,7 @@ static int event_read_fields(struct event *event, struct format_field **fields) count++; - if (test_type_token(type, token, EVENT_ITEM, (char *)"field")) + if (test_type_token(type, token, EVENT_ITEM, "field")) goto fail; free_token(token); @@ -774,7 +774,7 @@ static int event_read_fields(struct event *event, struct format_field **fields) type = read_token(&token); } - if (test_type_token(type, token, EVENT_OP, (char *)":") < 0) + if (test_type_token(type, token, EVENT_OP, ":") < 0) return -1; if (read_expect_type(EVENT_ITEM, &token) < 0) @@ -892,14 +892,14 @@ static int event_read_fields(struct event *event, struct format_field **fields) field->flags |= FIELD_IS_DYNAMIC; } - if (test_type_token(type, token, EVENT_OP, (char *)";")) + if (test_type_token(type, token, EVENT_OP, ";")) goto fail; free_token(token); - if (read_expected(EVENT_ITEM, (char *)"offset") < 0) + if (read_expected(EVENT_ITEM, "offset") < 0) goto fail_expect; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) goto fail_expect; if (read_expect_type(EVENT_ITEM, &token)) @@ -907,13 +907,13 @@ static int event_read_fields(struct event *event, struct format_field **fields) field->offset = strtoul(token, NULL, 0); free_token(token); - if (read_expected(EVENT_OP, (char *)";") < 0) + if (read_expected(EVENT_OP, ";") < 0) goto fail_expect; - if (read_expected(EVENT_ITEM, (char *)"size") < 0) + if (read_expected(EVENT_ITEM, "size") < 0) goto fail_expect; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) goto fail_expect; if (read_expect_type(EVENT_ITEM, &token)) @@ -921,18 +921,18 @@ static int event_read_fields(struct event *event, struct format_field **fields) field->size = strtoul(token, NULL, 0); free_token(token); - if (read_expected(EVENT_OP, (char *)";") < 0) + if (read_expected(EVENT_OP, ";") < 0) goto fail_expect; type = read_token(&token); if (type != EVENT_NEWLINE) { /* newer versions of the kernel have a "signed" type */ - if (test_type_token(type, token, EVENT_ITEM, (char *)"signed")) + if (test_type_token(type, token, EVENT_ITEM, "signed")) goto fail; free_token(token); - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) goto fail_expect; if (read_expect_type(EVENT_ITEM, &token)) @@ -941,7 +941,7 @@ static int event_read_fields(struct event *event, struct format_field **fields) /* add signed type */ free_token(token); - if (read_expected(EVENT_OP, (char *)";") < 0) + if (read_expected(EVENT_OP, ";") < 0) goto fail_expect; if (read_expect_type(EVENT_NEWLINE, &token)) @@ -970,10 +970,10 @@ static int event_read_format(struct event *event) char *token; int ret; - if (read_expected_item(EVENT_ITEM, (char *)"format") < 0) + if (read_expected_item(EVENT_ITEM, "format") < 0) return -1; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return -1; if (read_expect_type(EVENT_NEWLINE, &token)) @@ -1033,7 +1033,7 @@ process_cond(struct event *event, struct print_arg *top, char **tok) *tok = NULL; type = process_arg(event, left, &token); - if (test_type_token(type, token, EVENT_OP, (char *)":")) + if (test_type_token(type, token, EVENT_OP, ":")) goto out_free; arg->op.op = token; @@ -1065,7 +1065,7 @@ process_array(struct event *event, struct print_arg *top, char **tok) *tok = NULL; type = process_arg(event, arg, &token); - if (test_type_token(type, token, EVENT_OP, (char *)"]")) + if (test_type_token(type, token, EVENT_OP, "]")) goto out_free; top->op.right = arg; @@ -1287,7 +1287,7 @@ process_entry(struct event *event __unused, struct print_arg *arg, char *field; char *token; - if (read_expected(EVENT_OP, (char *)"->") < 0) + if (read_expected(EVENT_OP, "->") < 0) return EVENT_ERROR; if (read_expect_type(EVENT_ITEM, &token) < 0) @@ -1447,14 +1447,14 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok) do { free_token(token); type = read_token_item(&token); - if (test_type_token(type, token, EVENT_OP, (char *)"{")) + if (test_type_token(type, token, EVENT_OP, "{")) break; arg = malloc_or_die(sizeof(*arg)); free_token(token); type = process_arg(event, arg, &token); - if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free; field = malloc_or_die(sizeof(*field)); @@ -1465,7 +1465,7 @@ process_fields(struct event *event, struct print_flag_sym **list, char **tok) free_token(token); type = process_arg(event, arg, &token); - if (test_type_token(type, token, EVENT_OP, (char *)"}")) + if (test_type_token(type, token, EVENT_OP, "}")) goto out_free; value = arg_eval(arg); @@ -1500,13 +1500,13 @@ process_flags(struct event *event, struct print_arg *arg, char **tok) memset(arg, 0, sizeof(*arg)); arg->type = PRINT_FLAGS; - if (read_expected_item(EVENT_DELIM, (char *)"(") < 0) + if (read_expected_item(EVENT_DELIM, "(") < 0) return EVENT_ERROR; field = malloc_or_die(sizeof(*field)); type = process_arg(event, field, &token); - if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free; arg->flags.field = field; @@ -1517,11 +1517,11 @@ process_flags(struct event *event, struct print_arg *arg, char **tok) type = read_token_item(&token); } - if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free; type = process_fields(event, &arg->flags.flags, &token); - if (test_type_token(type, token, EVENT_DELIM, (char *)")")) + if (test_type_token(type, token, EVENT_DELIM, ")")) goto out_free; free_token(token); @@ -1543,19 +1543,19 @@ process_symbols(struct event *event, struct print_arg *arg, char **tok) memset(arg, 0, sizeof(*arg)); arg->type = PRINT_SYMBOL; - if (read_expected_item(EVENT_DELIM, (char *)"(") < 0) + if (read_expected_item(EVENT_DELIM, "(") < 0) return EVENT_ERROR; field = malloc_or_die(sizeof(*field)); type = process_arg(event, field, &token); - if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + if (test_type_token(type, token, EVENT_DELIM, ",")) goto out_free; arg->symbol.field = field; type = process_fields(event, &arg->symbol.symbols, &token); - if (test_type_token(type, token, EVENT_DELIM, (char *)")")) + if (test_type_token(type, token, EVENT_DELIM, ")")) goto out_free; free_token(token); @@ -1585,7 +1585,7 @@ process_paren(struct event *event, struct print_arg *arg, char **tok) if (type == EVENT_ERROR) return EVENT_ERROR; - if (test_type_token(type, token, EVENT_DELIM, (char *)")")) { + if (test_type_token(type, token, EVENT_DELIM, ")")) { free_token(token); return EVENT_ERROR; } @@ -1626,7 +1626,7 @@ process_str(struct event *event __unused, struct print_arg *arg, char **tok) enum event_type type; char *token; - if (read_expected(EVENT_DELIM, (char *)"(") < 0) + if (read_expected(EVENT_DELIM, "(") < 0) return EVENT_ERROR; if (read_expect_type(EVENT_ITEM, &token) < 0) @@ -1636,7 +1636,7 @@ process_str(struct event *event __unused, struct print_arg *arg, char **tok) arg->string.string = token; arg->string.offset = -1; - if (read_expected(EVENT_DELIM, (char *)")") < 0) + if (read_expected(EVENT_DELIM, ")") < 0) return EVENT_ERROR; type = read_token(&token); @@ -1775,13 +1775,13 @@ static int event_read_print(struct event *event) char *token; int ret; - if (read_expected_item(EVENT_ITEM, (char *)"print") < 0) + if (read_expected_item(EVENT_ITEM, "print") < 0) return -1; - if (read_expected(EVENT_ITEM, (char *)"fmt") < 0) + if (read_expected(EVENT_ITEM, "fmt") < 0) return -1; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return -1; if (read_expect_type(EVENT_DQUOTE, &token) < 0) @@ -1811,8 +1811,8 @@ static int event_read_print(struct event *event) token = cat; goto concat; } - - if (test_type_token(type, token, EVENT_DELIM, (char *)",")) + + if (test_type_token(type, token, EVENT_DELIM, ",")) goto fail; free_token(token); @@ -1931,7 +1931,7 @@ static int get_common_info(const char *type, int *offset, int *size) } static int __parse_common(void *data, int *size, int *offset, - char *name) + const char *name) { int ret; @@ -1949,7 +1949,7 @@ int trace_parse_common_type(void *data) static int type_size; return __parse_common(data, &type_size, &type_offset, - (char *)"common_type"); + "common_type"); } static int parse_common_pid(void *data) @@ -1958,7 +1958,7 @@ static int parse_common_pid(void *data) static int pid_size; return __parse_common(data, &pid_size, &pid_offset, - (char *)"common_pid"); + "common_pid"); } static int parse_common_pc(void *data) @@ -1967,7 +1967,7 @@ static int parse_common_pc(void *data) static int pc_size; return __parse_common(data, &pc_size, &pc_offset, - (char *)"common_preempt_count"); + "common_preempt_count"); } static int parse_common_flags(void *data) @@ -1976,7 +1976,7 @@ static int parse_common_flags(void *data) static int flags_size; return __parse_common(data, &flags_size, &flags_offset, - (char *)"common_flags"); + "common_flags"); } static int parse_common_lock_depth(void *data) @@ -1986,7 +1986,7 @@ static int parse_common_lock_depth(void *data) int ret; ret = __parse_common(data, &ld_size, &ld_offset, - (char *)"common_lock_depth"); + "common_lock_depth"); if (ret < 0) return -1; @@ -3049,15 +3049,15 @@ static void print_args(struct print_arg *args) } } -static void parse_header_field(char *field, +static void parse_header_field(const char *field, int *offset, int *size) { char *token; int type; - if (read_expected(EVENT_ITEM, (char *)"field") < 0) + if (read_expected(EVENT_ITEM, "field") < 0) return; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return; /* type */ @@ -3067,27 +3067,27 @@ static void parse_header_field(char *field, if (read_expected(EVENT_ITEM, field) < 0) return; - if (read_expected(EVENT_OP, (char *)";") < 0) + if (read_expected(EVENT_OP, ";") < 0) return; - if (read_expected(EVENT_ITEM, (char *)"offset") < 0) + if (read_expected(EVENT_ITEM, "offset") < 0) return; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return; if (read_expect_type(EVENT_ITEM, &token) < 0) goto fail; *offset = atoi(token); free_token(token); - if (read_expected(EVENT_OP, (char *)";") < 0) + if (read_expected(EVENT_OP, ";") < 0) return; - if (read_expected(EVENT_ITEM, (char *)"size") < 0) + if (read_expected(EVENT_ITEM, "size") < 0) return; - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return; if (read_expect_type(EVENT_ITEM, &token) < 0) goto fail; *size = atoi(token); free_token(token); - if (read_expected(EVENT_OP, (char *)";") < 0) + if (read_expected(EVENT_OP, ";") < 0) return; type = read_token(&token); if (type != EVENT_NEWLINE) { @@ -3095,19 +3095,19 @@ static void parse_header_field(char *field, if (type != EVENT_ITEM) goto fail; - if (strcmp(token, (char *)"signed") != 0) + if (strcmp(token, "signed") != 0) goto fail; free_token(token); - if (read_expected(EVENT_OP, (char *)":") < 0) + if (read_expected(EVENT_OP, ":") < 0) return; if (read_expect_type(EVENT_ITEM, &token)) goto fail; free_token(token); - if (read_expected(EVENT_OP, (char *)";") < 0) + if (read_expected(EVENT_OP, ";") < 0) return; if (read_expect_type(EVENT_NEWLINE, &token)) @@ -3121,11 +3121,11 @@ int parse_header_page(char *buf, unsigned long size) { init_input_buf(buf, size); - parse_header_field((char *)"timestamp", &header_page_ts_offset, + parse_header_field("timestamp", &header_page_ts_offset, &header_page_ts_size); - parse_header_field((char *)"commit", &header_page_size_offset, + parse_header_field("commit", &header_page_size_offset, &header_page_size_size); - parse_header_field((char *)"data", &header_page_data_offset, + parse_header_field("data", &header_page_data_offset, &header_page_data_size); return 0; -- cgit v0.10.2 From 3397e040dfacbb303498ced1baa96be983dcea06 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 14 Oct 2009 16:36:38 -0700 Subject: rcu: Add rnp->blocked_tasks to tracing Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: npiggin@suse.de Cc: jens.axboe@oracle.com Cc: Josh Triplett LKML-Reference: <20091014233638.GE6763@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar kernel/rcutree_trace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 4b31c77..1984cdc 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -155,12 +155,14 @@ static const struct file_operations rcudata_csv_fops = { static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) { + long gpnum; int level = 0; struct rcu_node *rnp; + gpnum = rsp->gpnum; seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x " "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n", - rsp->completed, rsp->gpnum, rsp->signaled, + rsp->completed, gpnum, rsp->signaled, (long)(rsp->jiffies_force_qs - jiffies), (int)(jiffies & 0xffff), rsp->n_force_qs, rsp->n_force_qs_ngp, @@ -171,8 +173,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) seq_puts(m, "\n"); level = rnp->level; } - seq_printf(m, "%lx/%lx %d:%d ^%d ", + seq_printf(m, "%lx/%lx %c>%c %d:%d ^%d ", rnp->qsmask, rnp->qsmaskinit, + "T."[list_empty(&rnp->blocked_tasks[gpnum & 1])], + "T."[list_empty(&rnp->blocked_tasks[!(gpnum & 1)])], rnp->grplo, rnp->grphi, rnp->grpnum); } seq_puts(m, "\n"); -- cgit v0.10.2 From bd58b430039435e4c981cf802b5b11d511d73abd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 14 Oct 2009 10:15:54 -0700 Subject: rcu: Update trace.txt documentation to reflect recent changes o Remove the CONFIG_PREEMPT_RCU documentation since this config option has now been removed. o Change the now-incorrect references to "rcu" labels to instead be "rcu_sched". o Add notes stating that CONFIG_TREE_PREEMPT_RCU kernels will have additional "rcu_preempt" output. o Note the new "oqlen" field in the rcuhier output (for RCU callbacks orphaned by an offlined CPU). Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: npiggin@suse.de Cc: jens.axboe@oracle.com LKML-Reference: <1255540559799-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 187bbf1..c1a9550 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -1,185 +1,10 @@ CONFIG_RCU_TRACE debugfs Files and Formats -The rcupreempt and rcutree implementations of RCU provide debugfs trace -output that summarizes counters and state. This information is useful for -debugging RCU itself, and can sometimes also help to debug abuses of RCU. -Note that the rcuclassic implementation of RCU does not provide debugfs -trace output. - -The following sections describe the debugfs files and formats for -preemptable RCU (rcupreempt) and hierarchical RCU (rcutree). - - -Preemptable RCU debugfs Files and Formats - -This implementation of RCU provides three debugfs files under the -top-level directory RCU: rcu/rcuctrs (which displays the per-CPU -counters used by preemptable RCU) rcu/rcugp (which displays grace-period -counters), and rcu/rcustats (which internal counters for debugging RCU). - -The output of "cat rcu/rcuctrs" looks as follows: - -CPU last cur F M - 0 5 -5 0 0 - 1 -1 0 0 0 - 2 0 1 0 0 - 3 0 1 0 0 - 4 0 1 0 0 - 5 0 1 0 0 - 6 0 2 0 0 - 7 0 -1 0 0 - 8 0 1 0 0 -ggp = 26226, state = waitzero - -The per-CPU fields are as follows: - -o "CPU" gives the CPU number. Offline CPUs are not displayed. - -o "last" gives the value of the counter that is being decremented - for the current grace period phase. In the example above, - the counters sum to 4, indicating that there are still four - RCU read-side critical sections still running that started - before the last counter flip. - -o "cur" gives the value of the counter that is currently being - both incremented (by rcu_read_lock()) and decremented (by - rcu_read_unlock()). In the example above, the counters sum to - 1, indicating that there is only one RCU read-side critical section - still running that started after the last counter flip. - -o "F" indicates whether RCU is waiting for this CPU to acknowledge - a counter flip. In the above example, RCU is not waiting on any, - which is consistent with the state being "waitzero" rather than - "waitack". - -o "M" indicates whether RCU is waiting for this CPU to execute a - memory barrier. In the above example, RCU is not waiting on any, - which is consistent with the state being "waitzero" rather than - "waitmb". - -o "ggp" is the global grace-period counter. - -o "state" is the RCU state, which can be one of the following: - - o "idle": there is no grace period in progress. - - o "waitack": RCU just incremented the global grace-period - counter, which has the effect of reversing the roles of - the "last" and "cur" counters above, and is waiting for - all the CPUs to acknowledge the flip. Once the flip has - been acknowledged, CPUs will no longer be incrementing - what are now the "last" counters, so that their sum will - decrease monotonically down to zero. - - o "waitzero": RCU is waiting for the sum of the "last" counters - to decrease to zero. - - o "waitmb": RCU is waiting for each CPU to execute a memory - barrier, which ensures that instructions from a given CPU's - last RCU read-side critical section cannot be reordered - with instructions following the memory-barrier instruction. - -The output of "cat rcu/rcugp" looks as follows: - -oldggp=48870 newggp=48873 - -Note that reading from this file provokes a synchronize_rcu(). The -"oldggp" value is that of "ggp" from rcu/rcuctrs above, taken before -executing the synchronize_rcu(), and the "newggp" value is also the -"ggp" value, but taken after the synchronize_rcu() command returns. - - -The output of "cat rcu/rcugp" looks as follows: - -na=1337955 nl=40 wa=1337915 wl=44 da=1337871 dl=0 dr=1337871 di=1337871 -1=50989 e1=6138 i1=49722 ie1=82 g1=49640 a1=315203 ae1=265563 a2=49640 -z1=1401244 ze1=1351605 z2=49639 m1=5661253 me1=5611614 m2=49639 - -These are counters tracking internal preemptable-RCU events, however, -some of them may be useful for debugging algorithms using RCU. In -particular, the "nl", "wl", and "dl" values track the number of RCU -callbacks in various states. The fields are as follows: - -o "na" is the total number of RCU callbacks that have been enqueued - since boot. - -o "nl" is the number of RCU callbacks waiting for the previous - grace period to end so that they can start waiting on the next - grace period. - -o "wa" is the total number of RCU callbacks that have started waiting - for a grace period since boot. "na" should be roughly equal to - "nl" plus "wa". - -o "wl" is the number of RCU callbacks currently waiting for their - grace period to end. - -o "da" is the total number of RCU callbacks whose grace periods - have completed since boot. "wa" should be roughly equal to - "wl" plus "da". - -o "dr" is the total number of RCU callbacks that have been removed - from the list of callbacks ready to invoke. "dr" should be roughly - equal to "da". - -o "di" is the total number of RCU callbacks that have been invoked - since boot. "di" should be roughly equal to "da", though some - early versions of preemptable RCU had a bug so that only the - last CPU's count of invocations was displayed, rather than the - sum of all CPU's counts. - -o "1" is the number of calls to rcu_try_flip(). This should be - roughly equal to the sum of "e1", "i1", "a1", "z1", and "m1" - described below. In other words, the number of times that - the state machine is visited should be equal to the sum of the - number of times that each state is visited plus the number of - times that the state-machine lock acquisition failed. - -o "e1" is the number of times that rcu_try_flip() was unable to - acquire the fliplock. - -o "i1" is the number of calls to rcu_try_flip_idle(). - -o "ie1" is the number of times rcu_try_flip_idle() exited early - due to the calling CPU having no work for RCU. - -o "g1" is the number of times that rcu_try_flip_idle() decided - to start a new grace period. "i1" should be roughly equal to - "ie1" plus "g1". - -o "a1" is the number of calls to rcu_try_flip_waitack(). - -o "ae1" is the number of times that rcu_try_flip_waitack() found - that at least one CPU had not yet acknowledge the new grace period - (AKA "counter flip"). - -o "a2" is the number of time rcu_try_flip_waitack() found that - all CPUs had acknowledged. "a1" should be roughly equal to - "ae1" plus "a2". (This particular output was collected on - a 128-CPU machine, hence the smaller-than-usual fraction of - calls to rcu_try_flip_waitack() finding all CPUs having already - acknowledged.) - -o "z1" is the number of calls to rcu_try_flip_waitzero(). - -o "ze1" is the number of times that rcu_try_flip_waitzero() found - that not all of the old RCU read-side critical sections had - completed. - -o "z2" is the number of times that rcu_try_flip_waitzero() finds - the sum of the counters equal to zero, in other words, that - all of the old RCU read-side critical sections had completed. - The value of "z1" should be roughly equal to "ze1" plus - "z2". - -o "m1" is the number of calls to rcu_try_flip_waitmb(). - -o "me1" is the number of times that rcu_try_flip_waitmb() finds - that at least one CPU has not yet executed a memory barrier. - -o "m2" is the number of times that rcu_try_flip_waitmb() finds that - all CPUs have executed a memory barrier. +The rcutree implementation of RCU provides debugfs trace output that +summarizes counters and state. This information is useful for debugging +RCU itself, and can sometimes also help to debug abuses of RCU. +The following sections describe the debugfs files and formats. Hierarchical RCU debugfs Files and Formats @@ -210,9 +35,10 @@ rcu_bh: 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 -The first section lists the rcu_data structures for rcu, the second for -rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. -The fields are as follows: +The first section lists the rcu_data structures for rcu_sched, the second +for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an +additional section for rcu_preempt. Each section has one line per CPU, +or eight for this 8-CPU system. The fields are as follows: o The number at the beginning of each line is the CPU number. CPUs numbers followed by an exclamation mark are offline, @@ -223,9 +49,9 @@ o The number at the beginning of each line is the CPU number. o "c" is the count of grace periods that this CPU believes have completed. CPUs in dynticks idle mode may lag quite a ways - behind, for example, CPU 4 under "rcu" above, which has slept - through the past 25 RCU grace periods. It is not unusual to - see CPUs lagging by thousands of grace periods. + behind, for example, CPU 4 under "rcu_sched" above, which has + slept through the past 25 RCU grace periods. It is not unusual + to see CPUs lagging by thousands of grace periods. o "g" is the count of grace periods that this CPU believes have started. Again, CPUs in dynticks idle mode may lag behind. @@ -308,8 +134,10 @@ The output of "cat rcu/rcugp" looks as follows: rcu_sched: completed=33062 gpnum=33063 rcu_bh: completed=464 gpnum=464 -Again, this output is for both "rcu" and "rcu_bh". The fields are -taken from the rcu_state structure, and are as follows: +Again, this output is for both "rcu_sched" and "rcu_bh". Note that +kernels built with CONFIG_TREE_PREEMPT_RCU will have an additional +"rcu_preempt" line. The fields are taken from the rcu_state structure, +and are as follows: o "completed" is the number of grace periods that have completed. It is comparable to the "c" field from rcu/rcudata in that a @@ -324,23 +152,24 @@ o "gpnum" is the number of grace periods that have started. It is If these two fields are equal (as they are for "rcu_bh" above), then there is no grace period in progress, in other words, RCU is idle. On the other hand, if the two fields differ (as they - do for "rcu" above), then an RCU grace period is in progress. + do for "rcu_sched" above), then an RCU grace period is in progress. The output of "cat rcu/rcuhier" looks as follows, with very long lines: -c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 +c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0 1/1 0:127 ^0 3/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 3/3f 0:5 ^0 2/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 rcu_bh: -c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 +c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0 0/1 0:127 ^0 0/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 0/3f 0:5 ^0 0/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 -This is once again split into "rcu" and "rcu_bh" portions. The fields are -as follows: +This is once again split into "rcu_sched" and "rcu_bh" portions, +and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional +"rcu_preempt" section. The fields are as follows: o "c" is exactly the same as "completed" under rcu/rcugp. @@ -372,6 +201,11 @@ o "fqlh" is the number of calls to force_quiescent_state() that exited immediately (without even being counted in nfqs above) due to contention on ->fqslock. +o "oqlen" is the number of callbacks on the "orphan" callback + list. RCU callbacks are placed on this list by CPUs going + offline, and are "adopted" either by the CPU helping the outgoing + CPU or by the next rcu_barrier*() call, whichever comes first. + o Each element of the form "1/1 0:127 ^0" represents one struct rcu_node. Each line represents one level of the hierarchy, from root to leaves. It is best to think of the rcu_data structures @@ -389,10 +223,10 @@ o Each element of the form "1/1 0:127 ^0" represents one struct The value of qsmaskinit is assigned to that of qsmask at the beginning of each grace period. - For example, for "rcu", the qsmask of the first entry - of the lowest level is 0x14, meaning that we are still - waiting for CPUs 2 and 4 to check in for the current - grace period. + For example, for "rcu_sched", the qsmask of the first + entry of the lowest level is 0x14, meaning that we + are still waiting for CPUs 2 and 4 to check in for the + current grace period. o The numbers separated by the ":" are the range of CPUs served by this struct rcu_node. This can be helpful @@ -431,8 +265,9 @@ rcu_bh: 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 -As always, this is once again split into "rcu" and "rcu_bh" portions. -The fields are as follows: +As always, this is once again split into "rcu_sched" and "rcu_bh" +portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional +"rcu_preempt" section. The fields are as follows: o "np" is the number of times that __rcu_pending() has been invoked for the corresponding flavor of RCU. -- cgit v0.10.2 From 0edf1a683e499191b27a067956ae9f5fa6e046c6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 14 Oct 2009 10:15:59 -0700 Subject: rcu: Update trace.txt documentation for blocked-tasks lists Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: npiggin@suse.de Cc: jens.axboe@oracle.com LKML-Reference: <12555405592804-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index c1a9550..8608fd8 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -158,14 +158,14 @@ o "gpnum" is the number of grace periods that have started. It is The output of "cat rcu/rcuhier" looks as follows, with very long lines: c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0 -1/1 0:127 ^0 -3/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 -3/3f 0:5 ^0 2/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 +1/1 .>. 0:127 ^0 +3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 +3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 rcu_bh: c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0 -0/1 0:127 ^0 -0/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 -0/3f 0:5 ^0 0/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 +0/1 .>. 0:127 ^0 +0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 +0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 This is once again split into "rcu_sched" and "rcu_bh" portions, and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional @@ -213,7 +213,7 @@ o Each element of the form "1/1 0:127 ^0" represents one struct might be either one, two, or three levels of rcu_node structures, depending on the relationship between CONFIG_RCU_FANOUT and CONFIG_NR_CPUS. - + o The numbers separated by the "/" are the qsmask followed by the qsmaskinit. The qsmask will have one bit set for each entity in the next lower level that @@ -228,6 +228,15 @@ o Each element of the form "1/1 0:127 ^0" represents one struct are still waiting for CPUs 2 and 4 to check in for the current grace period. + o The characters separated by the ">" indicate the state + of the blocked-tasks lists. A "T" preceding the ">" + indicates that at least one task blocked in an RCU + read-side critical section blocks the current grace + period, while a "." preceding the ">" indicates otherwise. + The character following the ">" indicates similarly for + the next grace period. A "T" should appear in this + field only for rcu-preempt. + o The numbers separated by the ":" are the range of CPUs served by this struct rcu_node. This can be helpful in working out how the hierarchy is wired together. -- cgit v0.10.2 From fce29d15b59245597f7f320db4a9f2be0f5fb512 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Oct 2009 11:20:34 +0800 Subject: tracing/filters: Refactor subsystem filter code Change: for_each_pred for_each_subsystem To: for_each_subsystem for_each_pred This change also prepares for later patches. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <4AD69502.8060903@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index acef8b4..6286145 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -683,7 +683,6 @@ struct event_filter { int n_preds; struct filter_pred **preds; char *filter_string; - bool no_reset; }; struct event_subsystem { diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 9267201..9c4f9a0 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -615,14 +615,7 @@ static int init_subsystem_preds(struct event_subsystem *system) return 0; } -enum { - FILTER_DISABLE_ALL, - FILTER_INIT_NO_RESET, - FILTER_SKIP_NO_RESET, -}; - -static void filter_free_subsystem_preds(struct event_subsystem *system, - int flag) +static void filter_free_subsystem_preds(struct event_subsystem *system) { struct ftrace_event_call *call; @@ -633,14 +626,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system, if (strcmp(call->system, system->name) != 0) continue; - if (flag == FILTER_INIT_NO_RESET) { - call->filter->no_reset = false; - continue; - } - - if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset) - continue; - filter_disable_preds(call); remove_filter_string(call->filter); } @@ -817,44 +802,6 @@ add_pred_fn: return 0; } -static int filter_add_subsystem_pred(struct filter_parse_state *ps, - struct event_subsystem *system, - struct filter_pred *pred, - char *filter_string, - bool dry_run) -{ - struct ftrace_event_call *call; - int err = 0; - bool fail = true; - - list_for_each_entry(call, &ftrace_events, list) { - - if (!call->define_fields) - continue; - - if (strcmp(call->system, system->name)) - continue; - - if (call->filter->no_reset) - continue; - - err = filter_add_pred(ps, call, pred, dry_run); - if (err) - call->filter->no_reset = true; - else - fail = false; - - if (!dry_run) - replace_filter_string(call->filter, filter_string); - } - - if (fail) { - parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); - return err; - } - return 0; -} - static void parse_init(struct filter_parse_state *ps, struct filter_op *ops, char *infix_string) @@ -1209,8 +1156,7 @@ static int check_preds(struct filter_parse_state *ps) return 0; } -static int replace_preds(struct event_subsystem *system, - struct ftrace_event_call *call, +static int replace_preds(struct ftrace_event_call *call, struct filter_parse_state *ps, char *filter_string, bool dry_run) @@ -1257,11 +1203,7 @@ static int replace_preds(struct event_subsystem *system, add_pred: if (!pred) return -ENOMEM; - if (call) - err = filter_add_pred(ps, call, pred, false); - else - err = filter_add_subsystem_pred(ps, system, pred, - filter_string, dry_run); + err = filter_add_pred(ps, call, pred, dry_run); filter_free_pred(pred); if (err) return err; @@ -1272,6 +1214,44 @@ add_pred: return 0; } +static int replace_system_preds(struct event_subsystem *system, + struct filter_parse_state *ps, + char *filter_string) +{ + struct ftrace_event_call *call; + int err; + bool fail = true; + + list_for_each_entry(call, &ftrace_events, list) { + + if (!call->define_fields) + continue; + + if (strcmp(call->system, system->name) != 0) + continue; + + /* try to see if the filter can be applied */ + err = replace_preds(call, ps, filter_string, true); + if (err) + continue; + + /* really apply the filter */ + filter_disable_preds(call); + err = replace_preds(call, ps, filter_string, false); + if (err) + filter_disable_preds(call); + else + replace_filter_string(call->filter, filter_string); + fail = false; + } + + if (fail) { + parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); + return err; + } + return 0; +} + int apply_event_filter(struct ftrace_event_call *call, char *filter_string) { int err; @@ -1306,7 +1286,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) goto out; } - err = replace_preds(NULL, call, ps, filter_string, false); + err = replace_preds(call, ps, filter_string, false); if (err) append_filter_err(ps, call->filter); @@ -1334,7 +1314,7 @@ int apply_subsystem_event_filter(struct event_subsystem *system, goto out_unlock; if (!strcmp(strstrip(filter_string), "0")) { - filter_free_subsystem_preds(system, FILTER_DISABLE_ALL); + filter_free_subsystem_preds(system); remove_filter_string(system->filter); mutex_unlock(&event_mutex); return 0; @@ -1354,23 +1334,9 @@ int apply_subsystem_event_filter(struct event_subsystem *system, goto out; } - filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET); - - /* try to see the filter can be applied to which events */ - err = replace_preds(system, NULL, ps, filter_string, true); - if (err) { - append_filter_err(ps, system->filter); - goto out; - } - - filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET); - - /* really apply the filter to the events */ - err = replace_preds(system, NULL, ps, filter_string, false); - if (err) { + err = replace_system_preds(system, ps, filter_string); + if (err) append_filter_err(ps, system->filter); - filter_free_subsystem_preds(system, 2); - } out: filter_opstack_clear(ps); -- cgit v0.10.2 From b0f1a59a98d7ac2102e7e4f22904c26d564a5628 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Oct 2009 11:21:12 +0800 Subject: tracing/filters: Use a different op for glob match "==" will always do a full match, and "~" will do a glob match. In the future, we may add "=~" for regex match. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <4AD69528.3050309@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6286145..ffe53dd 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -702,7 +702,7 @@ typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event, typedef int (*regex_match_func)(char *str, struct regex *r, int len); enum regex_type { - MATCH_FULL, + MATCH_FULL = 0, MATCH_FRONT_ONLY, MATCH_MIDDLE_ONLY, MATCH_END_ONLY, diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 9c4f9a0..273845f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -29,6 +29,7 @@ enum filter_op_ids { OP_OR, OP_AND, + OP_GLOB, OP_NE, OP_EQ, OP_LT, @@ -46,16 +47,17 @@ struct filter_op { }; static struct filter_op filter_ops[] = { - { OP_OR, "||", 1 }, - { OP_AND, "&&", 2 }, - { OP_NE, "!=", 4 }, - { OP_EQ, "==", 4 }, - { OP_LT, "<", 5 }, - { OP_LE, "<=", 5 }, - { OP_GT, ">", 5 }, - { OP_GE, ">=", 5 }, - { OP_NONE, "OP_NONE", 0 }, - { OP_OPEN_PAREN, "(", 0 }, + { OP_OR, "||", 1 }, + { OP_AND, "&&", 2 }, + { OP_GLOB, "~", 4 }, + { OP_NE, "!=", 4 }, + { OP_EQ, "==", 4 }, + { OP_LT, "<", 5 }, + { OP_LE, "<=", 5 }, + { OP_GT, ">", 5 }, + { OP_GE, ">=", 5 }, + { OP_NONE, "OP_NONE", 0 }, + { OP_OPEN_PAREN, "(", 0 }, }; enum { @@ -329,22 +331,18 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not) return type; } -static int filter_build_regex(struct filter_pred *pred) +static void filter_build_regex(struct filter_pred *pred) { struct regex *r = &pred->regex; - char *search, *dup; - enum regex_type type; - int not; - - type = filter_parse_regex(r->pattern, r->len, &search, ¬); - dup = kstrdup(search, GFP_KERNEL); - if (!dup) - return -ENOMEM; - - strcpy(r->pattern, dup); - kfree(dup); - - r->len = strlen(r->pattern); + char *search; + enum regex_type type = MATCH_FULL; + int not = 0; + + if (pred->op == OP_GLOB) { + type = filter_parse_regex(r->pattern, r->len, &search, ¬); + r->len = strlen(search); + memmove(r->pattern, search, r->len+1); + } switch (type) { case MATCH_FULL: @@ -362,8 +360,6 @@ static int filter_build_regex(struct filter_pred *pred) } pred->not ^= not; - - return 0; } /* return 1 if event matches, 0 otherwise (discard) */ @@ -676,7 +672,10 @@ static bool is_string_field(struct ftrace_event_field *field) static int is_legal_op(struct ftrace_event_field *field, int op) { - if (is_string_field(field) && (op != OP_EQ && op != OP_NE)) + if (is_string_field(field) && + (op != OP_EQ && op != OP_NE && op != OP_GLOB)) + return 0; + if (!is_string_field(field) && op == OP_GLOB) return 0; return 1; @@ -761,15 +760,13 @@ static int filter_add_pred(struct filter_parse_state *ps, } if (is_string_field(field)) { - ret = filter_build_regex(pred); - if (ret) - return ret; + filter_build_regex(pred); if (field->filter_type == FILTER_STATIC_STRING) { fn = filter_pred_string; pred->regex.field_len = field->size; } else if (field->filter_type == FILTER_DYN_STRING) - fn = filter_pred_strloc; + fn = filter_pred_strloc; else { fn = filter_pred_pchar; pred->regex.field_len = strlen(pred->regex.pattern); -- cgit v0.10.2 From 6fb2915df7f0747d9044da9dbff5b46dc2e20830 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Oct 2009 11:21:42 +0800 Subject: tracing/profile: Add filter support - Add an ioctl to allocate a filter for a perf event. - Free the filter when the associated perf event is to be freed. - Do the filtering in perf_swevent_match(). Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <4AD69546.8050401@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 4ec5e67..d117704 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -144,7 +144,7 @@ extern char *trace_profile_buf_nmi; #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ extern void destroy_preds(struct ftrace_event_call *call); -extern int filter_match_preds(struct ftrace_event_call *call, void *rec); +extern int filter_match_preds(struct event_filter *filter, void *rec); extern int filter_current_check_discard(struct ring_buffer *buffer, struct ftrace_event_call *call, void *rec, @@ -186,4 +186,13 @@ do { \ __trace_printk(ip, fmt, ##args); \ } while (0) +#ifdef CONFIG_EVENT_PROFILE +struct perf_event; +extern int ftrace_profile_enable(int event_id); +extern void ftrace_profile_disable(int event_id); +extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, + char *filter_str); +extern void ftrace_profile_free_filter(struct perf_event *event); +#endif + #endif /* _LINUX_FTRACE_EVENT_H */ diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7b7fbf4..91a2b43 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -225,6 +225,7 @@ struct perf_counter_attr { #define PERF_COUNTER_IOC_RESET _IO ('$', 3) #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) #define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_COUNTER_IOC_SET_FILTER _IOW('$', 6, char *) enum perf_counter_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e6d95f..df9d964 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -221,6 +221,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_RESET _IO ('$', 3) #define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, @@ -633,7 +634,12 @@ struct perf_event { struct pid_namespace *ns; u64 id; + +#ifdef CONFIG_EVENT_PROFILE + struct event_filter *filter; #endif + +#endif /* CONFIG_PERF_EVENTS */ }; /** diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9d0b5c6..12b5ec3 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -1658,6 +1659,8 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) return ERR_PTR(err); } +static void perf_event_free_filter(struct perf_event *event); + static void free_event_rcu(struct rcu_head *head) { struct perf_event *event; @@ -1665,6 +1668,7 @@ static void free_event_rcu(struct rcu_head *head) event = container_of(head, struct perf_event, rcu_head); if (event->ns) put_pid_ns(event->ns); + perf_event_free_filter(event); kfree(event); } @@ -1974,7 +1978,8 @@ unlock: return ret; } -int perf_event_set_output(struct perf_event *event, int output_fd); +static int perf_event_set_output(struct perf_event *event, int output_fd); +static int perf_event_set_filter(struct perf_event *event, void __user *arg); static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2002,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_EVENT_IOC_SET_OUTPUT: return perf_event_set_output(event, arg); + case PERF_EVENT_IOC_SET_FILTER: + return perf_event_set_filter(event, (void __user *)arg); + default: return -ENOTTY; } @@ -3806,9 +3814,14 @@ static int perf_swevent_is_counting(struct perf_event *event) return 1; } +static int perf_tp_event_match(struct perf_event *event, + struct perf_sample_data *data); + static int perf_swevent_match(struct perf_event *event, enum perf_type_id type, - u32 event_id, struct pt_regs *regs) + u32 event_id, + struct perf_sample_data *data, + struct pt_regs *regs) { if (!perf_swevent_is_counting(event)) return 0; @@ -3826,6 +3839,10 @@ static int perf_swevent_match(struct perf_event *event, return 0; } + if (event->attr.type == PERF_TYPE_TRACEPOINT && + !perf_tp_event_match(event, data)) + return 0; + return 1; } @@ -3842,7 +3859,7 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { - if (perf_swevent_match(event, type, event_id, regs)) + if (perf_swevent_match(event, type, event_id, data, regs)) perf_swevent_add(event, nr, nmi, data, regs); } rcu_read_unlock(); @@ -4086,6 +4103,7 @@ static const struct pmu perf_ops_task_clock = { }; #ifdef CONFIG_EVENT_PROFILE + void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size) { @@ -4109,8 +4127,15 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, } EXPORT_SYMBOL_GPL(perf_tp_event); -extern int ftrace_profile_enable(int); -extern void ftrace_profile_disable(int); +static int perf_tp_event_match(struct perf_event *event, + struct perf_sample_data *data) +{ + void *record = data->raw->data; + + if (likely(!event->filter) || filter_match_preds(event->filter, record)) + return 1; + return 0; +} static void tp_perf_event_destroy(struct perf_event *event) { @@ -4135,12 +4160,53 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) return &perf_ops_generic; } + +static int perf_event_set_filter(struct perf_event *event, void __user *arg) +{ + char *filter_str; + int ret; + + if (event->attr.type != PERF_TYPE_TRACEPOINT) + return -EINVAL; + + filter_str = strndup_user(arg, PAGE_SIZE); + if (IS_ERR(filter_str)) + return PTR_ERR(filter_str); + + ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); + + kfree(filter_str); + return ret; +} + +static void perf_event_free_filter(struct perf_event *event) +{ + ftrace_profile_free_filter(event); +} + #else + +static int perf_tp_event_match(struct perf_event *event, + struct perf_sample_data *data) +{ + return 1; +} + static const struct pmu *tp_perf_event_init(struct perf_event *event) { return NULL; } -#endif + +static int perf_event_set_filter(struct perf_event *event, void __user *arg) +{ + return -ENOENT; +} + +static void perf_event_free_filter(struct perf_event *event) +{ +} + +#endif /* CONFIG_EVENT_PROFILE */ atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; @@ -4394,7 +4460,7 @@ err_size: goto out; } -int perf_event_set_output(struct perf_event *event, int output_fd) +static int perf_event_set_output(struct perf_event *event, int output_fd) { struct perf_event *output_event = NULL; struct file *output_file = NULL; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ffe53dd..4959ada 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -743,7 +743,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { - if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) { + if (unlikely(call->filter_active) && + !filter_match_preds(call->filter, rec)) { ring_buffer_discard_commit(buffer, event); return 1; } diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 273845f..e27bb6a 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "trace.h" #include "trace_output.h" @@ -363,9 +364,8 @@ static void filter_build_regex(struct filter_pred *pred) } /* return 1 if event matches, 0 otherwise (discard) */ -int filter_match_preds(struct ftrace_event_call *call, void *rec) +int filter_match_preds(struct event_filter *filter, void *rec) { - struct event_filter *filter = call->filter; int match, top = 0, val1 = 0, val2 = 0; int stack[MAX_FILTER_PRED]; struct filter_pred *pred; @@ -538,9 +538,8 @@ static void filter_disable_preds(struct ftrace_event_call *call) filter->preds[i]->fn = filter_pred_none; } -void destroy_preds(struct ftrace_event_call *call) +static void __free_preds(struct event_filter *filter) { - struct event_filter *filter = call->filter; int i; if (!filter) @@ -553,21 +552,24 @@ void destroy_preds(struct ftrace_event_call *call) kfree(filter->preds); kfree(filter->filter_string); kfree(filter); +} + +void destroy_preds(struct ftrace_event_call *call) +{ + __free_preds(call->filter); call->filter = NULL; + call->filter_active = 0; } -static int init_preds(struct ftrace_event_call *call) +static struct event_filter *__alloc_preds(void) { struct event_filter *filter; struct filter_pred *pred; int i; - if (call->filter) - return 0; - - filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL); - if (!call->filter) - return -ENOMEM; + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return ERR_PTR(-ENOMEM); filter->n_preds = 0; @@ -583,12 +585,24 @@ static int init_preds(struct ftrace_event_call *call) filter->preds[i] = pred; } - return 0; + return filter; oom: - destroy_preds(call); + __free_preds(filter); + return ERR_PTR(-ENOMEM); +} + +static int init_preds(struct ftrace_event_call *call) +{ + if (call->filter) + return 0; + + call->filter_active = 0; + call->filter = __alloc_preds(); + if (IS_ERR(call->filter)) + return PTR_ERR(call->filter); - return -ENOMEM; + return 0; } static int init_subsystem_preds(struct event_subsystem *system) @@ -629,10 +643,10 @@ static void filter_free_subsystem_preds(struct event_subsystem *system) static int filter_add_pred_fn(struct filter_parse_state *ps, struct ftrace_event_call *call, + struct event_filter *filter, struct filter_pred *pred, filter_pred_fn_t fn) { - struct event_filter *filter = call->filter; int idx, err; if (filter->n_preds == MAX_FILTER_PRED) { @@ -647,7 +661,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps, return err; filter->n_preds++; - call->filter_active = 1; return 0; } @@ -726,6 +739,7 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size, static int filter_add_pred(struct filter_parse_state *ps, struct ftrace_event_call *call, + struct event_filter *filter, struct filter_pred *pred, bool dry_run) { @@ -795,7 +809,7 @@ static int filter_add_pred(struct filter_parse_state *ps, add_pred_fn: if (!dry_run) - return filter_add_pred_fn(ps, call, pred, fn); + return filter_add_pred_fn(ps, call, filter, pred, fn); return 0; } @@ -1154,6 +1168,7 @@ static int check_preds(struct filter_parse_state *ps) } static int replace_preds(struct ftrace_event_call *call, + struct event_filter *filter, struct filter_parse_state *ps, char *filter_string, bool dry_run) @@ -1200,7 +1215,7 @@ static int replace_preds(struct ftrace_event_call *call, add_pred: if (!pred) return -ENOMEM; - err = filter_add_pred(ps, call, pred, dry_run); + err = filter_add_pred(ps, call, filter, pred, dry_run); filter_free_pred(pred); if (err) return err; @@ -1216,6 +1231,7 @@ static int replace_system_preds(struct event_subsystem *system, char *filter_string) { struct ftrace_event_call *call; + struct event_filter *filter; int err; bool fail = true; @@ -1228,17 +1244,19 @@ static int replace_system_preds(struct event_subsystem *system, continue; /* try to see if the filter can be applied */ - err = replace_preds(call, ps, filter_string, true); + err = replace_preds(call, filter, ps, filter_string, true); if (err) continue; /* really apply the filter */ filter_disable_preds(call); - err = replace_preds(call, ps, filter_string, false); + err = replace_preds(call, filter, ps, filter_string, false); if (err) filter_disable_preds(call); - else - replace_filter_string(call->filter, filter_string); + else { + call->filter_active = 1; + replace_filter_string(filter, filter_string); + } fail = false; } @@ -1252,7 +1270,6 @@ static int replace_system_preds(struct event_subsystem *system, int apply_event_filter(struct ftrace_event_call *call, char *filter_string) { int err; - struct filter_parse_state *ps; mutex_lock(&event_mutex); @@ -1283,10 +1300,11 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) goto out; } - err = replace_preds(call, ps, filter_string, false); + err = replace_preds(call, call->filter, ps, filter_string, false); if (err) append_filter_err(ps, call->filter); - + else + call->filter_active = 1; out: filter_opstack_clear(ps); postfix_clear(ps); @@ -1301,7 +1319,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system, char *filter_string) { int err; - struct filter_parse_state *ps; mutex_lock(&event_mutex); @@ -1345,3 +1362,67 @@ out_unlock: return err; } +#ifdef CONFIG_EVENT_PROFILE + +void ftrace_profile_free_filter(struct perf_event *event) +{ + struct event_filter *filter = event->filter; + + event->filter = NULL; + __free_preds(filter); +} + +int ftrace_profile_set_filter(struct perf_event *event, int event_id, + char *filter_str) +{ + int err; + struct event_filter *filter; + struct filter_parse_state *ps; + struct ftrace_event_call *call = NULL; + + mutex_lock(&event_mutex); + + list_for_each_entry(call, &ftrace_events, list) { + if (call->id == event_id) + break; + } + if (!call) + return -EINVAL; + + if (event->filter) + return -EEXIST; + + filter = __alloc_preds(); + if (IS_ERR(filter)) + return PTR_ERR(filter); + + err = -ENOMEM; + ps = kzalloc(sizeof(*ps), GFP_KERNEL); + if (!ps) + goto free_preds; + + parse_init(ps, filter_ops, filter_str); + err = filter_parse(ps); + if (err) + goto free_ps; + + err = replace_preds(call, filter, ps, filter_str, false); + if (!err) + event->filter = filter; + +free_ps: + filter_opstack_clear(ps); + postfix_clear(ps); + kfree(ps); + +free_preds: + if (err) + __free_preds(filter); + + mutex_unlock(&event_mutex); + + return err; +} + +#endif /* CONFIG_EVENT_PROFILE */ + -- cgit v0.10.2 From c171b552a7d316c7e1c3ad6f70a30178dd53e14c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Oct 2009 11:22:07 +0800 Subject: perf trace: Add filter Suppport Add a new option "--filter " to perf record, and it should be right after "-e trace_point": #./perf record -R -f -e irq:irq_handler_entry --filter irq==18 ^C # ./perf trace perf-4303 ... irq_handler_entry: irq=18 handler=eth0 init-0 ... irq_handler_entry: irq=18 handler=eth0 init-0 ... irq_handler_entry: irq=18 handler=eth0 init-0 ... irq_handler_entry: irq=18 handler=eth0 init-0 ... irq_handler_entry: irq=18 handler=eth0 See Documentation/trace/events.txt for the syntax of filter expressions. Signed-off-by: Li Zefan Acked-by: Peter Zijlstra Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <4AD6955F.90602@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4e3a374..8b2c860 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -374,9 +374,11 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n static void create_counter(int counter, int cpu, pid_t pid) { + char *filter = filters[counter]; struct perf_event_attr *attr = attrs + counter; struct perf_header_attr *h_attr; int track = !counter; /* only the first counter needs these */ + int ret; struct { u64 count; u64 time_enabled; @@ -479,7 +481,6 @@ try_again: multiplex_fd = fd[nr_cpu][counter]; if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { - int ret; ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); assert(ret != -1); @@ -499,6 +500,16 @@ try_again: } } + if (filter != NULL) { + ret = ioctl(fd[nr_cpu][counter], + PERF_EVENT_IOC_SET_FILTER, filter); + if (ret) { + error("failed to set filter with %d (%s)\n", errno, + strerror(errno)); + exit(-1); + } + } + ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE); } @@ -676,6 +687,8 @@ static const struct option options[] = { OPT_CALLBACK('e', "event", NULL, "event", "event selector. use 'perf list' to list available events", parse_events), + OPT_CALLBACK(0, "filter", NULL, "filter", + "event filter", parse_filter), OPT_INTEGER('p', "pid", &target_pid, "record events on existing pid"), OPT_INTEGER('r', "realtime", &realtime_prio, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 8cfb48c..b097570 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -8,9 +8,10 @@ #include "cache.h" #include "header.h" -int nr_counters; +int nr_counters; struct perf_event_attr attrs[MAX_COUNTERS]; +char *filters[MAX_COUNTERS]; struct event_symbol { u8 type; @@ -708,7 +709,6 @@ static void store_event_type(const char *orgname) perf_header__push_event(id, orgname); } - int parse_events(const struct option *opt __used, const char *str, int unset __used) { struct perf_event_attr attr; @@ -745,6 +745,28 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u return 0; } +int parse_filter(const struct option *opt __used, const char *str, + int unset __used) +{ + int i = nr_counters - 1; + int len = strlen(str); + + if (i < 0 || attrs[i].type != PERF_TYPE_TRACEPOINT) { + fprintf(stderr, + "-F option should follow a -e tracepoint option\n"); + return -1; + } + + filters[i] = malloc(len + 1); + if (!filters[i]) { + fprintf(stderr, "not enough memory to hold filter string\n"); + return -1; + } + strcpy(filters[i], str); + + return 0; +} + static const char * const event_type_descriptors[] = { "", "Hardware event", diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 8626a43..b8c1f64 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -17,11 +17,13 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config); extern int nr_counters; extern struct perf_event_attr attrs[MAX_COUNTERS]; +extern char *filters[MAX_COUNTERS]; extern const char *event_name(int ctr); extern const char *__event_name(int type, u64 config); extern int parse_events(const struct option *opt, const char *str, int unset); +extern int parse_filter(const struct option *opt, const char *str, int unset); #define EVENTS_HELP_MAX (128*1024) -- cgit v0.10.2 From a66abe7fbf7805a1a02f241bd5283265ff6706ec Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Oct 2009 12:24:04 +0200 Subject: tracing/events: Fix locking imbalance in the filter code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Américo Wang noticed that we have a locking imbalance in the error paths of ftrace_profile_set_filter(), causing potential leakage of event_mutex. Also clean up other error codepaths related to event_mutex while at it. Plus fix an initialized variable in the subsystem filter code. Reported-by: Américo Wang Cc: Li Zefan Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Tom Zanussi LKML-Reference: <2375c9f90910150247u5ccb8e2at58c764e385ffa490@mail.gmail.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e27bb6a..21d3475 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1230,10 +1230,10 @@ static int replace_system_preds(struct event_subsystem *system, struct filter_parse_state *ps, char *filter_string) { + struct event_filter *filter = system->filter; struct ftrace_event_call *call; - struct event_filter *filter; - int err; bool fail = true; + int err; list_for_each_entry(call, &ftrace_events, list) { @@ -1262,7 +1262,7 @@ static int replace_system_preds(struct event_subsystem *system, if (fail) { parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0); - return err; + return -EINVAL; } return 0; } @@ -1281,8 +1281,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string) if (!strcmp(strstrip(filter_string), "0")) { filter_disable_preds(call); remove_filter_string(call->filter); - mutex_unlock(&event_mutex); - return 0; + goto out_unlock; } err = -ENOMEM; @@ -1330,8 +1329,7 @@ int apply_subsystem_event_filter(struct event_subsystem *system, if (!strcmp(strstrip(filter_string), "0")) { filter_free_subsystem_preds(system); remove_filter_string(system->filter); - mutex_unlock(&event_mutex); - return 0; + goto out_unlock; } err = -ENOMEM; @@ -1386,15 +1384,20 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id, if (call->id == event_id) break; } + + err = -EINVAL; if (!call) - return -EINVAL; + goto out_unlock; + err = -EEXIST; if (event->filter) - return -EEXIST; + goto out_unlock; filter = __alloc_preds(); - if (IS_ERR(filter)) - return PTR_ERR(filter); + if (IS_ERR(filter)) { + err = PTR_ERR(filter); + goto out_unlock; + } err = -ENOMEM; ps = kzalloc(sizeof(*ps), GFP_KERNEL); @@ -1419,6 +1422,7 @@ free_preds: if (err) __free_preds(filter); +out_unlock: mutex_unlock(&event_mutex); return err; -- cgit v0.10.2 From 434a83c3fbb951908a3a52040f7f0e0b8ba00dd0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 15 Oct 2009 11:50:39 +0200 Subject: events: Harmonize event field names and print output names Now that we can filter based on fields via perf record, people will start using filter expressions and will expect them to be obvious. The primary way to see which fields are available is by looking at the trace output, such as: gcc-18676 [000] 343.011728: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.012727: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.032692: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.033690: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.034687: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.035686: irq_handler_entry: irq=0 handler=timer cc1-18677 [000] 343.036684: irq_handler_entry: irq=0 handler=timer While 'irq==0' filters work, the 'handler==' filter expression does not work: $ perf record -R -f -a -e irq:irq_handler_entry --filter handler=timer sleep 1 Error: failed to set filter with 22 (Invalid argument) The problem is that while an 'irq' field exists and is recognized as a filter field - 'handler' does not exist - its name is 'name' in the output. To solve this, we need to synchronize the printout and the field names, wherever possible. In cases where the printout prints a non-field, we enclose that information in square brackets, such as: perf-1380 [013] 724.903505: softirq_exit: vec=9 [action=RCU] perf-1380 [013] 724.904482: softirq_exit: vec=1 [action=TIMER] This way users can use filter expressions more intuitively: all fields that show up as 'primary' (non-bracketed) information is filterable. This patch harmonizes the field names for all irq, bkl, power, sched and timer events. We might in fact think about dropping the print format bit of generic tracepoints altogether, and just print the fields that are being recorded. Cc: Li Zefan Cc: Tom Zanussi Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h index 8abd620..1af72dc 100644 --- a/include/trace/events/bkl.h +++ b/include/trace/events/bkl.h @@ -13,7 +13,7 @@ TRACE_EVENT(lock_kernel, TP_ARGS(func, file, line), TP_STRUCT__entry( - __field( int, lock_depth ) + __field( int, depth ) __field_ext( const char *, func, FILTER_PTR_STRING ) __field_ext( const char *, file, FILTER_PTR_STRING ) __field( int, line ) @@ -21,13 +21,13 @@ TRACE_EVENT(lock_kernel, TP_fast_assign( /* We want to record the lock_depth after lock is acquired */ - __entry->lock_depth = current->lock_depth + 1; + __entry->depth = current->lock_depth + 1; __entry->func = func; __entry->file = file; __entry->line = line; ), - TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, __entry->file, __entry->line, __entry->func) ); @@ -38,20 +38,20 @@ TRACE_EVENT(unlock_kernel, TP_ARGS(func, file, line), TP_STRUCT__entry( - __field(int, lock_depth) - __field(const char *, func) - __field(const char *, file) - __field(int, line) + __field(int, depth ) + __field(const char *, func ) + __field(const char *, file ) + __field(int, line ) ), TP_fast_assign( - __entry->lock_depth = current->lock_depth; + __entry->depth = current->lock_depth; __entry->func = func; __entry->file = file; __entry->line = line; ), - TP_printk("depth: %d, %s:%d %s()", __entry->lock_depth, + TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, __entry->file, __entry->line, __entry->func) ); diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index b89f9db..dcfcd44 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -48,7 +48,7 @@ TRACE_EVENT(irq_handler_entry, __assign_str(name, action->name); ), - TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name)) + TP_printk("irq=%d name=%s", __entry->irq, __get_str(name)) ); /** @@ -78,7 +78,7 @@ TRACE_EVENT(irq_handler_exit, __entry->ret = ret; ), - TP_printk("irq=%d return=%s", + TP_printk("irq=%d ret=%s", __entry->irq, __entry->ret ? "handled" : "unhandled") ); @@ -107,7 +107,7 @@ TRACE_EVENT(softirq_entry, __entry->vec = (int)(h - vec); ), - TP_printk("softirq=%d action=%s", __entry->vec, + TP_printk("vec=%d [action=%s]", __entry->vec, show_softirq_name(__entry->vec)) ); @@ -136,7 +136,7 @@ TRACE_EVENT(softirq_exit, __entry->vec = (int)(h - vec); ), - TP_printk("softirq=%d action=%s", __entry->vec, + TP_printk("vec=%d [action=%s]", __entry->vec, show_softirq_name(__entry->vec)) ); diff --git a/include/trace/events/power.h b/include/trace/events/power.h index ea6d579..9bb96e5 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -16,8 +16,6 @@ enum { }; #endif - - TRACE_EVENT(power_start, TP_PROTO(unsigned int type, unsigned int state), diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 4069c43..b50b985 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -26,7 +26,7 @@ TRACE_EVENT(sched_kthread_stop, __entry->pid = t->pid; ), - TP_printk("task %s:%d", __entry->comm, __entry->pid) + TP_printk("comm=%s pid=%d", __entry->comm, __entry->pid) ); /* @@ -46,7 +46,7 @@ TRACE_EVENT(sched_kthread_stop_ret, __entry->ret = ret; ), - TP_printk("ret %d", __entry->ret) + TP_printk("ret=%d", __entry->ret) ); /* @@ -73,7 +73,7 @@ TRACE_EVENT(sched_wait_task, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -94,7 +94,7 @@ TRACE_EVENT(sched_wakeup, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -102,12 +102,12 @@ TRACE_EVENT(sched_wakeup, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); /* @@ -127,7 +127,7 @@ TRACE_EVENT(sched_wakeup_new, __field( pid_t, pid ) __field( int, prio ) __field( int, success ) - __field( int, cpu ) + __field( int, target_cpu ) ), TP_fast_assign( @@ -135,12 +135,12 @@ TRACE_EVENT(sched_wakeup_new, __entry->pid = p->pid; __entry->prio = p->prio; __entry->success = success; - __entry->cpu = task_cpu(p); + __entry->target_cpu = task_cpu(p); ), - TP_printk("task %s:%d [%d] success=%d [%03d]", + TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->cpu) + __entry->success, __entry->target_cpu) ); /* @@ -176,7 +176,7 @@ TRACE_EVENT(sched_switch, __entry->next_prio = next->prio; ), - TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]", + TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d", __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, __entry->prev_state ? __print_flags(__entry->prev_state, "|", @@ -211,7 +211,7 @@ TRACE_EVENT(sched_migrate_task, __entry->dest_cpu = dest_cpu; ), - TP_printk("task %s:%d [%d] from: %d to: %d", + TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d", __entry->comm, __entry->pid, __entry->prio, __entry->orig_cpu, __entry->dest_cpu) ); @@ -237,7 +237,7 @@ TRACE_EVENT(sched_process_free, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -262,7 +262,7 @@ TRACE_EVENT(sched_process_exit, __entry->prio = p->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -287,7 +287,7 @@ TRACE_EVENT(sched_process_wait, __entry->prio = current->prio; ), - TP_printk("task %s:%d [%d]", + TP_printk("comm=%s pid=%d prio=%d", __entry->comm, __entry->pid, __entry->prio) ); @@ -314,7 +314,7 @@ TRACE_EVENT(sched_process_fork, __entry->child_pid = child->pid; ), - TP_printk("parent %s:%d child %s:%d", + TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d", __entry->parent_comm, __entry->parent_pid, __entry->child_comm, __entry->child_pid) ); @@ -340,7 +340,7 @@ TRACE_EVENT(sched_signal_send, __entry->sig = sig; ), - TP_printk("sig: %d task %s:%d", + TP_printk("sig=%d comm=%s pid=%d", __entry->sig, __entry->comm, __entry->pid) ); @@ -374,7 +374,7 @@ TRACE_EVENT(sched_stat_wait, __perf_count(delay); ), - TP_printk("task: %s:%d wait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); @@ -406,7 +406,7 @@ TRACE_EVENT(sched_stat_runtime, __perf_count(runtime); ), - TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]", + TP_printk("comm=%s pid=%d runtime=%Lu [ns] vruntime=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->runtime, (unsigned long long)__entry->vruntime) @@ -437,7 +437,7 @@ TRACE_EVENT(sched_stat_sleep, __perf_count(delay); ), - TP_printk("task: %s:%d sleep: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); @@ -467,7 +467,7 @@ TRACE_EVENT(sched_stat_iowait, __perf_count(delay); ), - TP_printk("task: %s:%d iowait: %Lu [ns]", + TP_printk("comm=%s pid=%d delay=%Lu [ns]", __entry->comm, __entry->pid, (unsigned long long)__entry->delay) ); diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 1844c48..e5ce87a 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -26,7 +26,7 @@ TRACE_EVENT(timer_init, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -54,7 +54,7 @@ TRACE_EVENT(timer_start, __entry->now = jiffies; ), - TP_printk("timer %p: func %pf, expires %lu, timeout %ld", + TP_printk("timer=%p function=%pf expires=%lu [timeout=%ld]", __entry->timer, __entry->function, __entry->expires, (long)__entry->expires - __entry->now) ); @@ -81,7 +81,7 @@ TRACE_EVENT(timer_expire_entry, __entry->now = jiffies; ), - TP_printk("timer %p: now %lu", __entry->timer, __entry->now) + TP_printk("timer=%p now=%lu", __entry->timer, __entry->now) ); /** @@ -108,7 +108,7 @@ TRACE_EVENT(timer_expire_exit, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -129,7 +129,7 @@ TRACE_EVENT(timer_cancel, __entry->timer = timer; ), - TP_printk("timer %p", __entry->timer) + TP_printk("timer=%p", __entry->timer) ); /** @@ -140,24 +140,24 @@ TRACE_EVENT(timer_cancel, */ TRACE_EVENT(hrtimer_init, - TP_PROTO(struct hrtimer *timer, clockid_t clockid, + TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, enum hrtimer_mode mode), - TP_ARGS(timer, clockid, mode), + TP_ARGS(hrtimer, clockid, mode), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( clockid_t, clockid ) __field( enum hrtimer_mode, mode ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; __entry->clockid = clockid; __entry->mode = mode; ), - TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer, + TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, __entry->clockid == CLOCK_REALTIME ? "CLOCK_REALTIME" : "CLOCK_MONOTONIC", __entry->mode == HRTIMER_MODE_ABS ? @@ -170,26 +170,26 @@ TRACE_EVENT(hrtimer_init, */ TRACE_EVENT(hrtimer_start, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( void *, function ) __field( s64, expires ) __field( s64, softexpires ) ), TP_fast_assign( - __entry->timer = timer; - __entry->function = timer->function; - __entry->expires = hrtimer_get_expires(timer).tv64; - __entry->softexpires = hrtimer_get_softexpires(timer).tv64; + __entry->hrtimer = hrtimer; + __entry->function = hrtimer->function; + __entry->expires = hrtimer_get_expires(hrtimer).tv64; + __entry->softexpires = hrtimer_get_softexpires(hrtimer).tv64; ), - TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu", - __entry->timer, __entry->function, + TP_printk("hrtimer=%p function=%pf expires=%llu softexpires=%llu", + __entry->hrtimer, __entry->function, (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->expires }), (unsigned long long)ktime_to_ns((ktime_t) { @@ -206,23 +206,22 @@ TRACE_EVENT(hrtimer_start, */ TRACE_EVENT(hrtimer_expire_entry, - TP_PROTO(struct hrtimer *timer, ktime_t *now), + TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), - TP_ARGS(timer, now), + TP_ARGS(hrtimer, now), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) __field( s64, now ) ), TP_fast_assign( - __entry->timer = timer; - __entry->now = now->tv64; + __entry->hrtimer = hrtimer; + __entry->now = now->tv64; ), - TP_printk("hrtimer %p, now %llu", __entry->timer, - (unsigned long long)ktime_to_ns((ktime_t) { - .tv64 = __entry->now })) + TP_printk("hrtimer=%p now=%llu", __entry->hrtimer, + (unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now })) ); /** @@ -234,40 +233,40 @@ TRACE_EVENT(hrtimer_expire_entry, */ TRACE_EVENT(hrtimer_expire_exit, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** * hrtimer_cancel - called when the hrtimer is canceled - * @timer: pointer to struct hrtimer + * @hrtimer: pointer to struct hrtimer */ TRACE_EVENT(hrtimer_cancel, - TP_PROTO(struct hrtimer *timer), + TP_PROTO(struct hrtimer *hrtimer), - TP_ARGS(timer), + TP_ARGS(hrtimer), TP_STRUCT__entry( - __field( void *, timer ) + __field( void *, hrtimer ) ), TP_fast_assign( - __entry->timer = timer; + __entry->hrtimer = hrtimer; ), - TP_printk("hrtimer %p", __entry->timer) + TP_printk("hrtimer=%p", __entry->hrtimer) ); /** @@ -302,7 +301,7 @@ TRACE_EVENT(itimer_state, __entry->interval_usec = value->it_interval.tv_usec; ), - TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu", + TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu", __entry->which, __entry->expires, __entry->value_sec, __entry->value_usec, __entry->interval_sec, __entry->interval_usec) @@ -332,7 +331,7 @@ TRACE_EVENT(itimer_expire, __entry->pid = pid_nr(pid); ), - TP_printk("which %d, pid %d, now %lu", __entry->which, + TP_printk("which=%d pid=%d now=%lu", __entry->which, (int) __entry->pid, __entry->now) ); -- cgit v0.10.2 From f88f2b4fdb1e098433ad2b005b6f7353f7268ce1 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 15 Oct 2009 19:04:16 +0400 Subject: x86: apic: Allow noop operations to be called almost at any time As only apic noop is used we allow to use almost any operation caller wants (and which of them noop driver supports of course). Initially it was reported by Ingo Molnar that apic noop issue a warning for pkg id (which is actually false positive and should be eliminated). So we save checking (and warning issue) for read/write operations while allow any other ops to be freely used. Also: - fix noop_cpu_to_logical_apicid, it should be 0. - rename noop_default_phys_pkg_id to noop_phys_pkg_id (we use default_ prefix for more general routines in apic subsystem). Reported-by: Ingo Molnar Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu Cc: Maciej W. Rozycki LKML-Reference: <20091015150416.GC5331@lenovo> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 61a5628..dce93d4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -246,6 +246,7 @@ static int modern_apic(void) */ void apic_disable(void) { + pr_info("APIC: switched to apic NOOP\n"); apic = &apic_noop; } diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 0b93ec2..9ab6ffb 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -6,7 +6,7 @@ * * Though in case if apic is disabled (for some reason) we try * to not uglify the caller's code and allow to call (some) apic routines - * like self-ipi, etc... and issue a warning if an operation is not allowed + * like self-ipi, etc... */ #include @@ -30,76 +30,88 @@ #include #include -/* - * some operations should never be reached with - * noop apic if it's not turned off, this mostly - * means the caller forgot to disable apic (or - * check the apic presence) before doing a call - */ -static void warn_apic_enabled(void) +static void noop_init_apic_ldr(void) { } +static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } +static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } +static void noop_send_IPI_allbutself(int vector) { } +static void noop_send_IPI_all(int vector) { } +static void noop_send_IPI_self(int vector) { } +static void noop_apic_wait_icr_idle(void) { } +static void noop_apic_icr_write(u32 low, u32 id) { } + +static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) { - WARN_ONCE((cpu_has_apic || !disable_apic), - "APIC: Called for NOOP operation with apic enabled\n"); + return -1; } -/* - * To check operations but do not bloat source code - */ -#define NOOP_FUNC(func) func { warn_apic_enabled(); } -#define NOOP_FUNC_RET(func, ret) func { warn_apic_enabled(); return ret; } - -NOOP_FUNC(static void noop_init_apic_ldr(void)) -NOOP_FUNC(static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector)) -NOOP_FUNC(static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)) -NOOP_FUNC(static void noop_send_IPI_allbutself(int vector)) -NOOP_FUNC(static void noop_send_IPI_all(int vector)) -NOOP_FUNC(static void noop_send_IPI_self(int vector)) -NOOP_FUNC_RET(static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip), -1) -NOOP_FUNC(static void noop_apic_write(u32 reg, u32 v)) -NOOP_FUNC(void noop_apic_wait_icr_idle(void)) -NOOP_FUNC_RET(static u32 noop_safe_apic_wait_icr_idle(void), 0) -NOOP_FUNC_RET(static u64 noop_apic_icr_read(void), 0) -NOOP_FUNC(static void noop_apic_icr_write(u32 low, u32 id)) -NOOP_FUNC_RET(static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map), phys_map) -NOOP_FUNC_RET(static int noop_cpu_to_logical_apicid(int cpu), 1) -NOOP_FUNC_RET(static int noop_default_phys_pkg_id(int cpuid_apic, int index_msb), 0) -NOOP_FUNC_RET(static unsigned int noop_get_apic_id(unsigned long x), 0) +static u32 noop_safe_apic_wait_icr_idle(void) +{ + return 0; +} + +static u64 noop_apic_icr_read(void) +{ + return 0; +} + +static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map) +{ + return phys_map; +} + +static int noop_cpu_to_logical_apicid(int cpu) +{ + return 0; +} + +static int noop_phys_pkg_id(int cpuid_apic, int index_msb) +{ + return 0; +} + +static unsigned int noop_get_apic_id(unsigned long x) +{ + return 0; +} static int noop_probe(void) { - /* should not ever be enabled this way */ + /* + * NOOP apic should not ever be + * enabled via probe routine + */ return 0; } static int noop_apic_id_registered(void) { - warn_apic_enabled(); - return physid_isset(read_apic_id(), phys_cpu_present_map); + /* + * if we would be really "pedantic" + * we should pass read_apic_id() here + * but since NOOP suppose APIC ID = 0 + * lets save a few cycles + */ + return physid_isset(0, phys_cpu_present_map); } static const struct cpumask *noop_target_cpus(void) { - warn_apic_enabled(); - /* only BSP here */ return cpumask_of(0); } static unsigned long noop_check_apicid_used(physid_mask_t bitmap, int apicid) { - warn_apic_enabled(); return physid_isset(apicid, bitmap); } static unsigned long noop_check_apicid_present(int bit) { - warn_apic_enabled(); return physid_isset(bit, phys_cpu_present_map); } static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) { - warn_apic_enabled(); if (cpu != 0) pr_warning("APIC: Vector allocated for non-BSP cpu\n"); cpumask_clear(retmask); @@ -108,22 +120,21 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) int noop_apicid_to_node(int logical_apicid) { - warn_apic_enabled(); - /* we're always on node 0 */ return 0; } static u32 noop_apic_read(u32 reg) { - /* - * noop-read is always safe until we have - * non-disabled unit - */ WARN_ON_ONCE((cpu_has_apic && !disable_apic)); return 0; } +static void noop_apic_write(u32 reg, u32 v) +{ + WARN_ON_ONCE((cpu_has_apic || !disable_apic)); +} + struct apic apic_noop = { .name = "noop", .probe = noop_probe, @@ -157,7 +168,7 @@ struct apic apic_noop = { .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, - .phys_pkg_id = noop_default_phys_pkg_id, + .phys_pkg_id = noop_phys_pkg_id, .mps_oem_check = NULL, -- cgit v0.10.2 From 5e09954a9acc3b435ffe318b95afd3c02fae069f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Oct 2009 12:31:32 +0200 Subject: x86, mce: Fix up MCE naming nomenclature Prefix global/setup routines with "mcheck_" thus differentiating from the internal facilities prefixed with "mce_". Also, prefix the per cpu calls with mcheck_cpu and rename them to reflect the MCE setup hierarchy of calls better. There should be no functionality change resulting from this patch. Signed-off-by: Borislav Petkov Cc: Andi Kleen LKML-Reference: <1255689093-26921-1-git-send-email-borislav.petkov@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 227a72d..161485d 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -120,9 +120,9 @@ extern int mce_disabled; extern int mce_p5_enabled; #ifdef CONFIG_X86_MCE -void mcheck_init(struct cpuinfo_x86 *c); +void mcheck_cpu_init(struct cpuinfo_x86 *c); #else -static inline void mcheck_init(struct cpuinfo_x86 *c) {} +static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} #endif #ifdef CONFIG_X86_ANCIENT_MCE diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b..4df69a3 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -839,7 +839,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ - mcheck_init(c); + mcheck_cpu_init(c); #endif select_idle_routine(c); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0fb9dc5..68d968e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1136,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); -static void mcheck_timer(unsigned long data) +static void mce_start_timer(unsigned long data) { struct timer_list *t = &per_cpu(mce_timer, data); int *n; @@ -1220,7 +1220,7 @@ static int mce_banks_init(void) /* * Initialize Machine Checks for a CPU. */ -static int __cpuinit mce_cap_init(void) +static int __cpuinit __mcheck_cpu_cap_init(void) { unsigned b; u64 cap; @@ -1258,7 +1258,7 @@ static int __cpuinit mce_cap_init(void) return 0; } -static void mce_init(void) +static void __mcheck_cpu_init_generic(void) { mce_banks_t all_banks; u64 cap; @@ -1287,7 +1287,7 @@ static void mce_init(void) } /* Add per CPU specific workarounds here */ -static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) +static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) { if (c->x86_vendor == X86_VENDOR_UNKNOWN) { pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); @@ -1355,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) return 0; } -static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) +static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) { if (c->x86 != 5) return; @@ -1369,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) } } -static void mce_cpu_features(struct cpuinfo_x86 *c) +static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { case X86_VENDOR_INTEL: @@ -1383,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) } } -static void mce_init_timer(void) +static void __mcheck_cpu_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); int *n = &__get_cpu_var(mce_next_interval); @@ -1394,7 +1394,7 @@ static void mce_init_timer(void) *n = check_interval * HZ; if (!*n) return; - setup_timer(t, mcheck_timer, smp_processor_id()); + setup_timer(t, mce_start_timer, smp_processor_id()); t->expires = round_jiffies(jiffies + *n); add_timer_on(t, smp_processor_id()); } @@ -1414,26 +1414,26 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = * Called for each booted CPU to set up machine checks. * Must be called with preempt off: */ -void __cpuinit mcheck_init(struct cpuinfo_x86 *c) +void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) { if (mce_disabled) return; - mce_ancient_init(c); + __mcheck_cpu_ancient_init(c); if (!mce_available(c)) return; - if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { + if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { mce_disabled = 1; return; } machine_check_vector = do_machine_check; - mce_init(); - mce_cpu_features(c); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(c); + __mcheck_cpu_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); if (raw_smp_processor_id() == 0) @@ -1665,7 +1665,7 @@ __setup("mce", mcheck_enable); * Disable machine checks on suspend and shutdown. We can't really handle * them later. */ -static int mce_disable(void) +static int mce_disable_error_reporting(void) { int i; @@ -1680,12 +1680,12 @@ static int mce_disable(void) static int mce_suspend(struct sys_device *dev, pm_message_t state) { - return mce_disable(); + return mce_disable_error_reporting(); } static int mce_shutdown(struct sys_device *dev) { - return mce_disable(); + return mce_disable_error_reporting(); } /* @@ -1695,8 +1695,8 @@ static int mce_shutdown(struct sys_device *dev) */ static int mce_resume(struct sys_device *dev) { - mce_init(); - mce_cpu_features(¤t_cpu_data); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_vendor(¤t_cpu_data); return 0; } @@ -1706,8 +1706,8 @@ static void mce_cpu_restart(void *data) del_timer_sync(&__get_cpu_var(mce_timer)); if (!mce_available(¤t_cpu_data)) return; - mce_init(); - mce_init_timer(); + __mcheck_cpu_init_generic(); + __mcheck_cpu_init_timer(); } /* Reinit MCEs after user configuration changes */ @@ -1733,7 +1733,7 @@ static void mce_enable_ce(void *all) cmci_reenable(); cmci_recheck(); if (all) - mce_init_timer(); + __mcheck_cpu_init_timer(); } static struct sysdev_class mce_sysclass = { @@ -2042,7 +2042,7 @@ static __init void mce_init_banks(void) } } -static __init int mce_init_device(void) +static __init int mcheck_init_device(void) { int err; int i = 0; @@ -2070,7 +2070,7 @@ static __init int mce_init_device(void) return err; } -device_initcall(mce_init_device); +device_initcall(mcheck_init_device); /* * Old style boot options parsing. Only for compatibility. @@ -2118,7 +2118,7 @@ static int fake_panic_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set, "%llu\n"); -static int __init mce_debugfs_init(void) +static int __init mcheck_debugfs_init(void) { struct dentry *dmce, *ffake_panic; @@ -2132,5 +2132,5 @@ static int __init mce_debugfs_init(void) return 0; } -late_initcall(mce_debugfs_init); +late_initcall(mcheck_debugfs_init); #endif -- cgit v0.10.2 From b33a6363649f0ff83ec81597ea7fe7e688f973cb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 16 Oct 2009 12:31:33 +0200 Subject: x86, mce: Add a global MCE init helper Add an early initcall (pre SMP) which sets up global MCE functionality. Signed-off-by: Borislav Petkov Cc: Andi Kleen LKML-Reference: <1255689093-26921-2-git-send-email-borislav.petkov@amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 68d968e..8080170 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1436,8 +1436,6 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_timer(); INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); - if (raw_smp_processor_id() == 0) - atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); } /* @@ -1657,6 +1655,14 @@ static int __init mcheck_enable(char *str) } __setup("mce", mcheck_enable); +static int __init mcheck_init(void) +{ + atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); + + return 0; +} +early_initcall(mcheck_init); + /* * Sysfs support */ -- cgit v0.10.2 From f39cdf25bf77219676ec5360980ac40b1a7e144a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 17 Oct 2009 08:43:17 +0200 Subject: perf tools: Move dereference after NULL test In each case, if the NULL test on thread is needed, then the dereference should be after the NULL test. A simplified version of the semantic match that detects this problem is as follows (http://coccinelle.lip6.fr/): // @match exists@ expression x, E; identifier fld; @@ * x->fld ... when != \(x = E\|&x\) * x == NULL // Signed-off-by: Julia Lawall LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 3fe0de0..56ba716 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -104,14 +104,14 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) event->ip.pid, (void *)(long)ip); - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - if (thread == NULL) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; } + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + if (event->header.misc & PERF_RECORD_MISC_KERNEL) { level = 'k'; sym = kernel_maps__find_symbol(ip, &map); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 015c797..a4f8cc2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -629,14 +629,14 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) } } - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - if (thread == NULL) { eprintf("problem processing %d event, skipping it.\n", event->header.type); return -1; } + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + if (comm_list && !strlist__has_entry(comm_list, thread->comm)) return 0; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index c9c6856..57ad3f4 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1667,14 +1667,14 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) (void *)(long)ip, (long long)period); - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - if (thread == NULL) { eprintf("problem processing %d event, skipping it.\n", event->header.type); return -1; } + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + if (profile_cpu != -1 && profile_cpu != (int) cpu) return 0; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ce8459a..4c129ff 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -80,14 +80,14 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) (void *)(long)ip, (long long)period); - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - if (thread == NULL) { eprintf("problem processing %d event, skipping it.\n", event->header.type); return -1; } + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + if (sample_type & PERF_SAMPLE_RAW) { struct { u32 size; -- cgit v0.10.2 From f397af06e4c9bf5a0bc92facb8cb29905e338ab0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:07:20 -0400 Subject: tracing/kprobes: Update kprobe-tracer selftest against new syntax Update kprobe-tracer selftest since command syntax has been changed. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000720.16556.26343.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 739f70e..cdacdab 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1438,12 +1438,12 @@ static __init int kprobe_trace_self_tests_init(void) pr_info("Testing kprobe tracing: "); ret = command_trace_probe("p:testprobe kprobe_trace_selftest_target " - "a1 a2 a3 a4 a5 a6"); + "$arg1 $arg2 $arg3 $arg4 $stack $stack0"); if (WARN_ON_ONCE(ret)) pr_warning("error enabling function entry\n"); ret = command_trace_probe("r:testprobe2 kprobe_trace_selftest_target " - "ra rv"); + "$retval"); if (WARN_ON_ONCE(ret)) pr_warning("error enabling function return\n"); -- cgit v0.10.2 From e63cc2397ecc0f2b604f22fb9cdbb05911c1e5d4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:07:28 -0400 Subject: tracing/kprobes: Add failure messages for debugging Add verbose failure messages to kprobe-tracer for debugging. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000728.16556.16713.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index cdacdab..b8ef707 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -597,15 +597,19 @@ static int create_trace_probe(int argc, char **argv) void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; - if (argc < 2) + if (argc < 2) { + pr_info("Probe point is not specified.\n"); return -EINVAL; + } if (argv[0][0] == 'p') is_return = 0; else if (argv[0][0] == 'r') is_return = 1; - else + else { + pr_info("Probe definition must be started with 'p' or 'r'.\n"); return -EINVAL; + } if (argv[0][1] == ':') { event = &argv[0][2]; @@ -625,21 +629,29 @@ static int create_trace_probe(int argc, char **argv) } if (isdigit(argv[1][0])) { - if (is_return) + if (is_return) { + pr_info("Return probe point must be a symbol.\n"); return -EINVAL; + } /* an address specified */ ret = strict_strtoul(&argv[0][2], 0, (unsigned long *)&addr); - if (ret) + if (ret) { + pr_info("Failed to parse address.\n"); return ret; + } } else { /* a symbol specified */ symbol = argv[1]; /* TODO: support .init module functions */ ret = split_symbol_offset(symbol, &offset); - if (ret) + if (ret) { + pr_info("Failed to parse symbol.\n"); return ret; - if (offset && is_return) + } + if (offset && is_return) { + pr_info("Return probe must be used without offset.\n"); return -EINVAL; + } } argc -= 2; argv += 2; @@ -658,8 +670,11 @@ static int create_trace_probe(int argc, char **argv) } tp = alloc_trace_probe(group, event, addr, symbol, offset, argc, is_return); - if (IS_ERR(tp)) + if (IS_ERR(tp)) { + pr_info("Failed to allocate trace_probe.(%d)\n", + (int)PTR_ERR(tp)); return PTR_ERR(tp); + } /* parse arguments */ ret = 0; @@ -672,6 +687,8 @@ static int create_trace_probe(int argc, char **argv) arg = argv[i]; if (conflict_field_name(argv[i], tp->args, i)) { + pr_info("Argument%d name '%s' conflicts with " + "another field.\n", i, argv[i]); ret = -EINVAL; goto error; } @@ -685,8 +702,10 @@ static int create_trace_probe(int argc, char **argv) goto error; } ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); - if (ret) + if (ret) { + pr_info("Parse error at argument%d. (%d)\n", i, ret); goto error; + } } tp->nr_args = i; -- cgit v0.10.2 From 8c95bc3e206cff7a55edd2fc5f0e2b305d57903f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:07:36 -0400 Subject: x86: Add MMX/SSE opcode groups to opcode map Add missing MMX/SSE opcode groups to x86 opcode map. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000736.16556.29061.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 78a0daf..e7285d8 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -777,12 +777,22 @@ GrpTable: Grp11 EndTable GrpTable: Grp12 +2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B) +4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B) +6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B) EndTable GrpTable: Grp13 +2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B) +4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B) +6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B) EndTable GrpTable: Grp14 +2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B) +3: psrldq Udq,Ib (66),(11B) +6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B) +7: pslldq Udq,Ib (66),(11B) EndTable GrpTable: Grp15 -- cgit v0.10.2 From d1baf5a5a6088e2991b7dbbd370ff200bd6615ce Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:07:44 -0400 Subject: x86: Add AMD prefetch and 3DNow! opcodes to opcode map Add AMD prefetch and 3DNow! opcode including FEMMS. Since 3DNow! uses the last immediate byte as an opcode extension byte, x86 insn just treats the extenstion byte as an immediate byte instead of a part of opcode (insn_get_opcode() decodes first "0x0f 0x0f" bytes.) Users who are interested in analyzing 3DNow! opcode still can decode it by analyzing the immediate byte. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000744.16556.27881.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index e7285d8..894497f 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -306,9 +306,10 @@ Referrer: 2-byte escape 0a: 0b: UD2 (1B) 0c: -0d: NOP Ev -0e: -0f: +0d: NOP Ev | GrpP +0e: FEMMS +# 3DNow! uses the last imm byte as opcode extension. +0f: 3DNow! Pq,Qq,Ib # 0x0f 0x10-0x1f 10: movups Vps,Wps | movss Vss,Wss (F3) | movupd Vpd,Wpd (66) | movsd Vsd,Wsd (F2) 11: movups Wps,Vps | movss Wss,Vss (F3) | movupd Wpd,Vpd (66) | movsd Wsd,Vsd (F2) @@ -813,6 +814,12 @@ GrpTable: Grp16 3: prefetch T2 EndTable +# AMD's Prefetch Group +GrpTable: GrpP +0: PREFETCH +1: PREFETCHW +EndTable + GrpTable: GrpPDLK 0: MONTMUL 1: XSHA1 -- cgit v0.10.2 From 4c20194c2de151bca14224ae384b47abf7636a95 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:07:52 -0400 Subject: perf: Check libdwarf APIs for perf probe Check libdwarf APIs for perf probe in tools/perf/Makefile. Since dwarf_get_ranges() has been added from libdwarf 20081231 (and it's the newest function used in probe-finder.c), this just checks whether the function is defined. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000752.16556.92051.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 52b1f43..03c27b9 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -420,8 +420,8 @@ ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel); endif -ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) - msg := $(warning No libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel); +ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) + msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231); BASIC_CFLAGS += -DNO_LIBDWARF else EXTLIBS += -lelf -ldwarf -- cgit v0.10.2 From 074fc0e4b3f5d24306c2995f2f3b0bd4759e8aeb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:08:01 -0400 Subject: perf: Use die() for error cases in perf-probe Use die() for exiting perf-probe with errors. This replaces perror_exit(), msg_exit() and fprintf()+exit() with die(), and uses die() in semantic_error(). This also renames 'die' local variables to 'dw_die' for avoiding name confliction. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000801.16556.46866.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 73c883b..a1467d1 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -49,6 +49,7 @@ const char *default_search_path[NR_SEARCH_PATH] = { #define MAX_PATH_LEN 256 #define MAX_PROBES 128 +#define MAX_PROBE_ARGS 128 /* Session management structure */ static struct { @@ -60,19 +61,7 @@ static struct { char *events[MAX_PROBES]; } session; -static void semantic_error(const char *msg) -{ - fprintf(stderr, "Semantic error: %s\n", msg); - exit(1); -} - -static void perror_exit(const char *msg) -{ - perror(msg); - exit(1); -} - -#define MAX_PROBE_ARGS 128 +#define semantic_error(msg ...) die("Semantic error :" msg) static int parse_probepoint(const struct option *opt __used, const char *str, int unset __used) @@ -109,7 +98,7 @@ static int parse_probepoint(const struct option *opt __used, /* Duplicate the argument */ argv[argc] = strndup(s, str - s); if (argv[argc] == NULL) - perror_exit("strndup"); + die("strndup"); if (++argc == MAX_PROBE_ARGS) semantic_error("Too many arguments"); debug("argv[%d]=%s\n", argc, argv[argc - 1]); @@ -171,7 +160,7 @@ static int parse_probepoint(const struct option *opt __used, if (pp->nr_args > 0) { pp->args = (char **)malloc(sizeof(char *) * pp->nr_args); if (!pp->args) - perror_exit("malloc"); + die("malloc"); memcpy(pp->args, &argv[2], sizeof(char *) * pp->nr_args); } @@ -260,7 +249,7 @@ static int write_new_event(int fd, const char *buf) printf("Adding new event: %s\n", buf); ret = write(fd, buf, strlen(buf)); if (ret <= 0) - perror("Error: Failed to create event"); + die("failed to create event."); return ret; } @@ -273,7 +262,7 @@ static int synthesize_probepoint(struct probe_point *pp) int i, len, ret; pp->probes[0] = buf = (char *)calloc(MAX_CMDLEN, sizeof(char)); if (!buf) - perror_exit("calloc"); + die("calloc"); ret = snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); if (ret <= 0 || ret >= MAX_CMDLEN) goto error; @@ -322,10 +311,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) ret = synthesize_probepoint(&session.probes[j]); if (ret == -E2BIG) semantic_error("probe point is too long."); - else if (ret < 0) { - perror("snprintf"); - return -1; - } + else if (ret < 0) + die("snprintf"); } #ifndef NO_LIBDWARF @@ -336,10 +323,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) fd = open(session.vmlinux, O_RDONLY); else fd = open_default_vmlinux(); - if (fd < 0) { - perror("vmlinux/module file open"); - return -1; - } + if (fd < 0) + die("vmlinux/module file open"); /* Searching probe points */ for (j = 0; j < session.nr_probe; j++) { @@ -349,10 +334,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) lseek(fd, SEEK_SET, 0); ret = find_probepoint(fd, pp); - if (ret <= 0) { - fprintf(stderr, "Error: No probe point found.\n"); - return -1; - } + if (ret <= 0) + die("No probe point found.\n"); debug("probe event %s found\n", session.events[j]); } close(fd); @@ -363,10 +346,8 @@ setup_probes: /* Settng up probe points */ snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path); fd = open(buf, O_WRONLY, O_APPEND); - if (fd < 0) { - perror("kprobe_events open"); - return -1; - } + if (fd < 0) + die("kprobe_events open"); for (j = 0; j < session.nr_probe; j++) { pp = &session.probes[j]; if (pp->found == 1) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index ec6f53f..338fdb9 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -31,6 +31,8 @@ #include #include #include + +#include "util.h" #include "probe-finder.h" @@ -43,20 +45,6 @@ struct die_link { static Dwarf_Debug __dw_debug; static Dwarf_Error __dw_error; -static void msg_exit(int ret, const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - fprintf(stderr, "Error: "); - vfprintf(stderr, fmt, ap); - va_end(ap); - - fprintf(stderr, "\n"); - exit(ret); -} - - /* * Generic dwarf analysis helpers */ @@ -151,11 +139,11 @@ static Dwarf_Unsigned die_get_fileno(Dwarf_Die cu_die, const char *fname) } /* Compare diename and tname */ -static int die_compare_name(Dwarf_Die die, const char *tname) +static int die_compare_name(Dwarf_Die dw_die, const char *tname) { char *name; int ret; - ret = dwarf_diename(die, &name, &__dw_error); + ret = dwarf_diename(dw_die, &name, &__dw_error); ERR_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_OK) { ret = strcmp(tname, name); @@ -187,25 +175,25 @@ static int die_within_subprogram(Dwarf_Die sp_die, Dwarf_Addr addr, } /* Check the die is inlined function */ -static Dwarf_Bool die_inlined_subprogram(Dwarf_Die die) +static Dwarf_Bool die_inlined_subprogram(Dwarf_Die dw_die) { /* TODO: check strictly */ Dwarf_Bool inl; int ret; - ret = dwarf_hasattr(die, DW_AT_inline, &inl, &__dw_error); + ret = dwarf_hasattr(dw_die, DW_AT_inline, &inl, &__dw_error); ERR_IF(ret == DW_DLV_ERROR); return inl; } /* Get the offset of abstruct_origin */ -static Dwarf_Off die_get_abstract_origin(Dwarf_Die die) +static Dwarf_Off die_get_abstract_origin(Dwarf_Die dw_die) { Dwarf_Attribute attr; Dwarf_Off cu_offs; int ret; - ret = dwarf_attr(die, DW_AT_abstract_origin, &attr, &__dw_error); + ret = dwarf_attr(dw_die, DW_AT_abstract_origin, &attr, &__dw_error); ERR_IF(ret != DW_DLV_OK); ret = dwarf_formref(attr, &cu_offs, &__dw_error); ERR_IF(ret != DW_DLV_OK); @@ -214,7 +202,7 @@ static Dwarf_Off die_get_abstract_origin(Dwarf_Die die) } /* Get entry pc(or low pc, 1st entry of ranges) of the die */ -static Dwarf_Addr die_get_entrypc(Dwarf_Die die) +static Dwarf_Addr die_get_entrypc(Dwarf_Die dw_die) { Dwarf_Attribute attr; Dwarf_Addr addr; @@ -224,7 +212,7 @@ static Dwarf_Addr die_get_entrypc(Dwarf_Die die) int ret; /* Try to get entry pc */ - ret = dwarf_attr(die, DW_AT_entry_pc, &attr, &__dw_error); + ret = dwarf_attr(dw_die, DW_AT_entry_pc, &attr, &__dw_error); ERR_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_OK) { ret = dwarf_formaddr(attr, &addr, &__dw_error); @@ -234,13 +222,13 @@ static Dwarf_Addr die_get_entrypc(Dwarf_Die die) } /* Try to get low pc */ - ret = dwarf_lowpc(die, &addr, &__dw_error); + ret = dwarf_lowpc(dw_die, &addr, &__dw_error); ERR_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_OK) return addr; /* Try to get ranges */ - ret = dwarf_attr(die, DW_AT_ranges, &attr, &__dw_error); + ret = dwarf_attr(dw_die, DW_AT_ranges, &attr, &__dw_error); ERR_IF(ret != DW_DLV_OK); ret = dwarf_formref(attr, &offs, &__dw_error); ERR_IF(ret != DW_DLV_OK); @@ -382,11 +370,11 @@ static void show_location(Dwarf_Loc *loc, struct probe_finder *pf) } else if (op == DW_OP_regx) { regn = loc->lr_number; } else - msg_exit(-EINVAL, "Dwarf_OP %d is not supported.\n", op); + die("Dwarf_OP %d is not supported.\n", op); regs = get_arch_regstr(regn); if (!regs) - msg_exit(-EINVAL, "%lld exceeds max register number.\n", regn); + die("%lld exceeds max register number.\n", regn); if (deref) ret = snprintf(pf->buf, pf->len, @@ -417,8 +405,8 @@ static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf) dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); return ; error: - msg_exit(-1, "Failed to find the location of %s at this address.\n" - " Perhaps, it was optimized out.\n", pf->var); + die("Failed to find the location of %s at this address.\n" + " Perhaps, it has been optimized out.\n", pf->var); } static int variable_callback(struct die_link *dlink, void *data) @@ -456,8 +444,7 @@ static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf) /* Search child die for local variables and parameters. */ ret = search_die_from_children(sp_die, variable_callback, pf); if (!ret) - msg_exit(-1, "Failed to find '%s' in this function.\n", - pf->var); + die("Failed to find '%s' in this function.\n", pf->var); } /* Get a frame base on the address */ @@ -568,8 +555,7 @@ static void find_by_line(Dwarf_Die cu_die, struct probe_finder *pf) /* Search a real subprogram including this line, */ ret = search_die_from_children(cu_die, probeaddr_callback, pf); if (ret == 0) - msg_exit(-1, - "Probe point is not found in subprograms.\n"); + die("Probe point is not found in subprograms.\n"); /* Continuing, because target line might be inlined. */ } dwarf_srclines_dealloc(__dw_debug, lines, cnt); @@ -621,7 +607,7 @@ static int probefunc_callback(struct die_link *dlink, void *data) !die_inlined_subprogram(lk->die)) goto found; } - msg_exit(-1, "Failed to find real subprogram.\n"); + die("Failed to find real subprogram.\n"); found: /* Get offset from subprogram */ ret = die_within_subprogram(lk->die, pf->addr, &offs); @@ -649,8 +635,7 @@ int find_probepoint(int fd, struct probe_point *pp) ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error); if (ret != DW_DLV_OK) - msg_exit(-1, "Failed to call dwarf_init(). " - "Maybe, not a dwarf file?\n"); + die("Failed to call dwarf_init(). Maybe, not a dwarf file.\n"); pp->found = 0; while (++cu_number) { -- cgit v0.10.2 From 89c69c0eee7515cdc217f4278de43547284b3458 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:08:10 -0400 Subject: perf: Use eprintf() for debug messages in perf-probe Replace debug() macro with eprintf() and add -v option for showing those messages in perf-probe. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000810.16556.38013.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index a1467d1..b5ad86a 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -35,6 +35,8 @@ #include "perf.h" #include "builtin.h" #include "util/util.h" +#include "util/event.h" +#include "util/debug.h" #include "util/parse-options.h" #include "util/parse-events.h" /* For debugfs_path */ #include "util/probe-finder.h" @@ -76,7 +78,7 @@ static int parse_probepoint(const struct option *opt __used, if (!str) /* The end of probe points */ return 0; - debug("Probe-define(%d): %s\n", session.nr_probe, str); + eprintf("probe-definition(%d): %s\n", session.nr_probe, str); if (++session.nr_probe == MAX_PROBES) semantic_error("Too many probes"); @@ -101,7 +103,7 @@ static int parse_probepoint(const struct option *opt __used, die("strndup"); if (++argc == MAX_PROBE_ARGS) semantic_error("Too many arguments"); - debug("argv[%d]=%s\n", argc, argv[argc - 1]); + eprintf("argv[%d]=%s\n", argc, argv[argc - 1]); } } while (*str != '\0'); if (argc < 2) @@ -131,7 +133,7 @@ static int parse_probepoint(const struct option *opt __used, pp->line = atoi(ptr); if (!pp->file || !pp->line) semantic_error("Failed to parse line."); - debug("file:%s line:%d\n", pp->file, pp->line); + eprintf("file:%s line:%d\n", pp->file, pp->line); } else { /* Function name */ ptr = strchr(arg, '+'); @@ -148,7 +150,7 @@ static int parse_probepoint(const struct option *opt __used, pp->file = strdup(ptr); } pp->function = strdup(arg); - debug("symbol:%s file:%s offset:%d\n", + eprintf("symbol:%s file:%s offset:%d\n", pp->function, pp->file, pp->offset); } free(argv[1]); @@ -173,7 +175,7 @@ static int parse_probepoint(const struct option *opt __used, session.need_dwarf = 1; } - debug("%d arguments\n", pp->nr_args); + eprintf("%d arguments\n", pp->nr_args); return 0; } @@ -186,7 +188,7 @@ static int open_default_vmlinux(void) ret = uname(&uts); if (ret) { - debug("uname() failed.\n"); + eprintf("uname() failed.\n"); return -errno; } session.release = uts.release; @@ -194,11 +196,12 @@ static int open_default_vmlinux(void) ret = snprintf(fname, MAX_PATH_LEN, default_search_path[i], session.release); if (ret >= MAX_PATH_LEN || ret < 0) { - debug("Filename(%d,%s) is too long.\n", i, uts.release); + eprintf("Filename(%d,%s) is too long.\n", i, + uts.release); errno = E2BIG; return -E2BIG; } - debug("try to open %s\n", fname); + eprintf("try to open %s\n", fname); fd = open(fname, O_RDONLY); if (fd >= 0) break; @@ -213,6 +216,8 @@ static const char * const probe_usage[] = { }; static const struct option options[] = { + OPT_BOOLEAN('v', "verbose", &verbose, + "be more verbose (show parsed arguments, etc)"), #ifndef NO_LIBDWARF OPT_STRING('k', "vmlinux", &session.vmlinux, "file", "vmlinux/module pathname"), @@ -336,7 +341,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) ret = find_probepoint(fd, pp); if (ret <= 0) die("No probe point found.\n"); - debug("probe event %s found\n", session.events[j]); + eprintf("probe event %s found\n", session.events[j]); } close(fd); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 338fdb9..db24c91 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -32,6 +32,8 @@ #include #include +#include "event.h" +#include "debug.h" #include "util.h" #include "probe-finder.h" @@ -134,7 +136,7 @@ static Dwarf_Unsigned die_get_fileno(Dwarf_Die cu_die, const char *fname) dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST); } if (found) - debug("found fno: %d\n", (int)found); + eprintf("found fno: %d\n", (int)found); return found; } @@ -440,7 +442,7 @@ static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf) return ; } - debug("Searching '%s' variable in context.\n", pf->var); + eprintf("Searching '%s' variable in context.\n", pf->var); /* Search child die for local variables and parameters. */ ret = search_die_from_children(sp_die, variable_callback, pf); if (!ret) @@ -550,7 +552,7 @@ static void find_by_line(Dwarf_Die cu_die, struct probe_finder *pf) ret = dwarf_lineaddr(lines[i], &addr, &__dw_error); ERR_IF(ret != DW_DLV_OK); - debug("Probe point found: 0x%llx\n", addr); + eprintf("Probe point found: 0x%llx\n", addr); pf->addr = addr; /* Search a real subprogram including this line, */ ret = search_die_from_children(cu_die, probeaddr_callback, pf); @@ -581,7 +583,7 @@ static int probefunc_callback(struct die_link *dlink, void *data) &pf->inl_offs, &__dw_error); ERR_IF(ret != DW_DLV_OK); - debug("inline definition offset %lld\n", + eprintf("inline definition offset %lld\n", pf->inl_offs); return 0; } @@ -597,7 +599,7 @@ static int probefunc_callback(struct die_link *dlink, void *data) /* Get probe address */ pf->addr = die_get_entrypc(dlink->die); pf->addr += pp->offset; - debug("found inline addr: 0x%llx\n", pf->addr); + eprintf("found inline addr: 0x%llx\n", pf->addr); /* Inlined function. Get a real subprogram */ for (lk = dlink->parent; lk != NULL; lk = lk->parent) { tag = 0; diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 306810c..6a7cb0c 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -4,13 +4,6 @@ #define _stringify(n) #n #define stringify(n) _stringify(n) -#ifdef DEBUG -#define debug(fmt ...) \ - fprintf(stderr, "DBG(" __FILE__ ":" stringify(__LINE__) "): " fmt) -#else -#define debug(fmt ...) do {} while (0) -#endif - #define ERR_IF(cnd) \ do { if (cnd) { \ fprintf(stderr, "Error (" __FILE__ ":" stringify(__LINE__) \ -- cgit v0.10.2 From 9769833b8e4425dc93fc837bf124c6cb02a51abb Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:08:18 -0400 Subject: perf: Add DIE_IF() macro for error checking Add DIE_IF() macro and replace ERR_IF() with it, and use linux/stringify.h. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000818.16556.82452.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 03c27b9..1abbf9a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -321,6 +321,7 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h +LIB_H += ../../include/linux/stringify.h LIB_H += util/include/linux/list.h LIB_H += perf.h LIB_H += util/types.h diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index db24c91..be997ab 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -146,7 +146,7 @@ static int die_compare_name(Dwarf_Die dw_die, const char *tname) char *name; int ret; ret = dwarf_diename(dw_die, &name, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_OK) { ret = strcmp(tname, name); dwarf_dealloc(__dw_debug, name, DW_DLA_STRING); @@ -164,11 +164,11 @@ static int die_within_subprogram(Dwarf_Die sp_die, Dwarf_Addr addr, /* TODO: check ranges */ ret = dwarf_lowpc(sp_die, &lopc, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_NO_ENTRY) return 0; ret = dwarf_highpc(sp_die, &hipc, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); if (lopc <= addr && addr < hipc) { *offs = addr - lopc; return 1; @@ -184,7 +184,7 @@ static Dwarf_Bool die_inlined_subprogram(Dwarf_Die dw_die) int ret; ret = dwarf_hasattr(dw_die, DW_AT_inline, &inl, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); return inl; } @@ -196,9 +196,9 @@ static Dwarf_Off die_get_abstract_origin(Dwarf_Die dw_die) int ret; ret = dwarf_attr(dw_die, DW_AT_abstract_origin, &attr, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); ret = dwarf_formref(attr, &cu_offs, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); return cu_offs; } @@ -215,28 +215,28 @@ static Dwarf_Addr die_get_entrypc(Dwarf_Die dw_die) /* Try to get entry pc */ ret = dwarf_attr(dw_die, DW_AT_entry_pc, &attr, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_OK) { ret = dwarf_formaddr(attr, &addr, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); return addr; } /* Try to get low pc */ ret = dwarf_lowpc(dw_die, &addr, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_OK) return addr; /* Try to get ranges */ ret = dwarf_attr(dw_die, DW_AT_ranges, &attr, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); ret = dwarf_formref(attr, &offs, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); ret = dwarf_get_ranges(__dw_debug, offs, &ranges, &cnt, NULL, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); addr = ranges[0].dwr_addr1; dwarf_ranges_dealloc(__dw_debug, ranges, cnt); return addr; @@ -261,7 +261,7 @@ static int __search_die_tree(struct die_link *cur_link, while (!(ret = die_cb(cur_link, data))) { /* Check child die */ ret = dwarf_child(cur_link->die, &new_die, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_OK) { new_link.parent = cur_link; new_link.die = new_die; @@ -273,7 +273,7 @@ static int __search_die_tree(struct die_link *cur_link, /* Move to next sibling */ ret = dwarf_siblingof(__dw_debug, cur_link->die, &new_die, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); dwarf_dealloc(__dw_debug, cur_link->die, DW_DLA_DIE); cur_link->die = new_die; if (ret == DW_DLV_NO_ENTRY) @@ -293,7 +293,7 @@ static int search_die_from_children(Dwarf_Die parent_die, new_link.parent = NULL; ret = dwarf_child(parent_die, &new_link.die, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_OK) return __search_die_tree(&new_link, die_cb, data); else @@ -309,7 +309,7 @@ static int attr_get_locdesc(Dwarf_Attribute attr, Dwarf_Locdesc *desc, int ret, i; ret = dwarf_loclist_n(attr, &llbuf, &lcnt, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); ret = DW_DLV_NO_ENTRY; for (i = 0; i < lcnt; ++i) { if (llbuf[i]->ld_lopc <= addr && @@ -317,7 +317,7 @@ static int attr_get_locdesc(Dwarf_Attribute attr, Dwarf_Locdesc *desc, memcpy(desc, llbuf[i], sizeof(Dwarf_Locdesc)); desc->ld_s = malloc(sizeof(Dwarf_Loc) * llbuf[i]->ld_cents); - ERR_IF(desc->ld_s == NULL); + DIE_IF(desc->ld_s == NULL); memcpy(desc->ld_s, llbuf[i]->ld_s, sizeof(Dwarf_Loc) * llbuf[i]->ld_cents); ret = DW_DLV_OK; @@ -383,8 +383,8 @@ static void show_location(Dwarf_Loc *loc, struct probe_finder *pf) " %s=%+lld(%s)", pf->var, offs, regs); else ret = snprintf(pf->buf, pf->len, " %s=%s", pf->var, regs); - ERR_IF(ret < 0); - ERR_IF(ret >= pf->len); + DIE_IF(ret < 0); + DIE_IF(ret >= pf->len); } /* Show a variables in kprobe event format */ @@ -401,7 +401,7 @@ static void show_variable(Dwarf_Die vr_die, struct probe_finder *pf) if (ret != DW_DLV_OK) goto error; /* TODO? */ - ERR_IF(ld.ld_cents != 1); + DIE_IF(ld.ld_cents != 1); show_location(&ld.ld_s[0], pf); free(ld.ld_s); dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); @@ -418,7 +418,7 @@ static int variable_callback(struct die_link *dlink, void *data) int ret; ret = dwarf_tag(dlink->die, &tag, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if ((tag == DW_TAG_formal_parameter || tag == DW_TAG_variable) && (die_compare_name(dlink->die, pf->var) == 0)) { @@ -437,8 +437,8 @@ static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf) if (!is_c_varname(pf->var)) { /* Output raw parameters */ ret = snprintf(pf->buf, pf->len, " %s", pf->var); - ERR_IF(ret < 0); - ERR_IF(ret >= pf->len); + DIE_IF(ret < 0); + DIE_IF(ret >= pf->len); return ; } @@ -456,9 +456,9 @@ static void get_current_frame_base(Dwarf_Die sp_die, struct probe_finder *pf) int ret; ret = dwarf_attr(sp_die, DW_AT_frame_base, &attr, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); ret = attr_get_locdesc(attr, &pf->fbloc, (pf->addr - pf->cu_base)); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); } @@ -479,7 +479,7 @@ static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs, /* Output name of probe point */ ret = dwarf_diename(sp_die, &name, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_OK) { ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%u", name, (unsigned int)offs); @@ -488,8 +488,8 @@ static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs, /* This function has no name. */ ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%llx", pf->addr); } - ERR_IF(ret < 0); - ERR_IF(ret >= MAX_PROBE_BUFFER); + DIE_IF(ret < 0); + DIE_IF(ret >= MAX_PROBE_BUFFER); len = ret; /* Find each argument */ @@ -515,7 +515,7 @@ static int probeaddr_callback(struct die_link *dlink, void *data) int ret; ret = dwarf_tag(dlink->die, &tag, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); /* Check the address is in this subprogram */ if (tag == DW_TAG_subprogram && die_within_subprogram(dlink->die, pf->addr, &offs)) { @@ -537,21 +537,21 @@ static void find_by_line(Dwarf_Die cu_die, struct probe_finder *pf) int ret; ret = dwarf_srclines(cu_die, &lines, &cnt, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); for (i = 0; i < cnt; i++) { ret = dwarf_line_srcfileno(lines[i], &fno, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); if (fno != pf->fno) continue; ret = dwarf_lineno(lines[i], &lineno, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); if (lineno != (Dwarf_Unsigned)pp->line) continue; ret = dwarf_lineaddr(lines[i], &addr, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); eprintf("Probe point found: 0x%llx\n", addr); pf->addr = addr; /* Search a real subprogram including this line, */ @@ -574,7 +574,7 @@ static int probefunc_callback(struct die_link *dlink, void *data) int ret; ret = dwarf_tag(dlink->die, &tag, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if (tag == DW_TAG_subprogram) { if (die_compare_name(dlink->die, pp->function) == 0) { if (die_inlined_subprogram(dlink->die)) { @@ -582,7 +582,7 @@ static int probefunc_callback(struct die_link *dlink, void *data) ret = dwarf_die_CU_offset(dlink->die, &pf->inl_offs, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); eprintf("inline definition offset %lld\n", pf->inl_offs); return 0; @@ -604,7 +604,7 @@ static int probefunc_callback(struct die_link *dlink, void *data) for (lk = dlink->parent; lk != NULL; lk = lk->parent) { tag = 0; dwarf_tag(lk->die, &tag, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if (tag == DW_TAG_subprogram && !die_inlined_subprogram(lk->die)) goto found; @@ -613,7 +613,7 @@ static int probefunc_callback(struct die_link *dlink, void *data) found: /* Get offset from subprogram */ ret = die_within_subprogram(lk->die, pf->addr, &offs); - ERR_IF(!ret); + DIE_IF(!ret); show_probepoint(lk->die, offs, pf); /* Continue to search */ } @@ -644,13 +644,13 @@ int find_probepoint(int fd, struct probe_point *pp) /* Search CU (Compilation Unit) */ ret = dwarf_next_cu_header(__dw_debug, NULL, NULL, NULL, &addr_size, &next_cuh, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_NO_ENTRY) break; /* Get the DIE(Debugging Information Entry) of this CU */ ret = dwarf_siblingof(__dw_debug, 0, &cu_die, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); /* Check if target file is included. */ if (pp->file) @@ -659,7 +659,7 @@ int find_probepoint(int fd, struct probe_point *pp) if (!pp->file || pf.fno) { /* Save CU base address (for frame_base) */ ret = dwarf_lowpc(cu_die, &pf.cu_base, &__dw_error); - ERR_IF(ret == DW_DLV_ERROR); + DIE_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_NO_ENTRY) pf.cu_base = 0; if (pp->line) @@ -670,7 +670,7 @@ int find_probepoint(int fd, struct probe_point *pp) dwarf_dealloc(__dw_debug, cu_die, DW_DLA_DIE); } ret = dwarf_finish(__dw_debug, &__dw_error); - ERR_IF(ret != DW_DLV_OK); + DIE_IF(ret != DW_DLV_OK); return pp->found; } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 6a7cb0c..d17fafc 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -1,16 +1,6 @@ #ifndef _PROBE_FINDER_H #define _PROBE_FINDER_H -#define _stringify(n) #n -#define stringify(n) _stringify(n) - -#define ERR_IF(cnd) \ - do { if (cnd) { \ - fprintf(stderr, "Error (" __FILE__ ":" stringify(__LINE__) \ - "): " stringify(cnd) "\n"); \ - exit(1); \ - } } while (0) - #define MAX_PATH_LEN 256 #define MAX_PROBE_BUFFER 1024 #define MAX_PROBES 128 diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9de2329..0daa341 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -134,6 +134,15 @@ extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); +#include "../../../include/linux/stringify.h" + +#define DIE_IF(cnd) \ + do { if (cnd) \ + die(" at (" __FILE__ ":" __stringify(__LINE__) "): " \ + __stringify(cnd) "\n"); \ + } while (0) + + extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); extern int prefixcmp(const char *str, const char *prefix); -- cgit v0.10.2 From 595c36490deb49381dc51231a3d5e6b66786ed27 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 16 Oct 2009 20:08:27 -0400 Subject: perf: Add perf-probe document Add perf-probe subcommand document and add it to command-list. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <20091017000827.16556.73539.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt new file mode 100644 index 0000000..6b6c6ae --- /dev/null +++ b/tools/perf/Documentation/perf-probe.txt @@ -0,0 +1,48 @@ +perf-probe(1) +============= + +NAME +---- +perf-probe - Define new dynamic tracepoints + +SYNOPSIS +-------- +[verse] +'perf probe' [-k ] -P 'PROBE' [-P 'PROBE' ...] + + +DESCRIPTION +----------- +This command defines dynamic tracepoint events, by symbol and registers +without debuginfo, or by C expressions (C line numbers, C function names, +and C local variables) with debuginfo. + + +OPTIONS +------- +-k:: +--vmlinux:: + Specify vmlinux path which has debuginfo (Dwarf binary). + +-v:: +--verbose:: + Be more verbose (show parsed arguments, etc). + +-P:: +--probe:: + Define a probe point (see PROBE SYNTAX for detail) + +PROBE SYNTAX +------------ +Probe points are defined by following syntax. + + "TYPE:[GRP/]NAME FUNC[+OFFS][@SRC]|@SRC:LINE [ARG ...]" + +'TYPE' specifies the type of probe point, you can use either "p" (kprobe) or "r" (kretprobe) for 'TYPE'. 'GRP' specifies the group name of this probe, and 'NAME' specifies the event name. If 'GRP' is omitted, "kprobes" is used for its group name. +'FUNC' and 'OFFS' specifies function and offset (in byte) where probe will be put. In addition, 'SRC' specifies a source file which has that function (this is mainly for inline functions). +You can specify a probe point by the source line number by using '@SRC:LINE' syntax, where 'SRC' is the source file path and 'LINE' is the line number. +'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc). + +SEE ALSO +-------- +linkperf:perf-trace[1], linkperf:perf-record[1] diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 00326e2..6475db4 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -11,3 +11,4 @@ perf-stat mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common perf-trace mainporcelain common +perf-probe mainporcelain common -- cgit v0.10.2 From 11018201b831e19304c0d639f105ad6c27e120b1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 18 Oct 2009 22:29:23 +1100 Subject: perf stat: Add branch performance metric When we count both branches and branch-misses it is useful to print out the percentage of branch-misses: # perf stat -e branches -e branch-misses /bin/true Performance counter stats for '/bin/true': 401684 branches # 0.000 M/sec 23301 branch-misses # 5.801 % Signed-off-by: Anton Blanchard Cc: paulus@samba.org Cc: a.p.zijlstra@chello.nl LKML-Reference: <20091018112923.GQ4808@kryten> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3db31e7..c373683 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -125,6 +125,7 @@ struct stats event_res_stats[MAX_COUNTERS][3]; struct stats runtime_nsecs_stats; struct stats walltime_nsecs_stats; struct stats runtime_cycles_stats; +struct stats runtime_branches_stats; #define MATCH_EVENT(t, c, counter) \ (attrs[counter].type == PERF_TYPE_##t && \ @@ -235,6 +236,8 @@ static void read_counter(int counter) update_stats(&runtime_nsecs_stats, count[0]); if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter)) update_stats(&runtime_cycles_stats, count[0]); + if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter)) + update_stats(&runtime_branches_stats, count[0]); } static int run_perf_stat(int argc __used, const char **argv) @@ -352,6 +355,14 @@ static void abs_printout(int counter, double avg) ratio = avg / total; fprintf(stderr, " # %10.3f IPC ", ratio); + } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter)) { + total = avg_stats(&runtime_branches_stats); + + if (total) + ratio = avg * 100 / total; + + fprintf(stderr, " # %10.3f %% ", ratio); + } else { total = avg_stats(&runtime_nsecs_stats); -- cgit v0.10.2 From 5a116dd2797677cad48fee2f42267e3cb69f5502 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 17 Oct 2009 17:12:33 +0200 Subject: perf tools: Use kernel bitmap library Use the kernel bitmap library for internal perf tools uses. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt LKML-Reference: <1255792354-11304-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 106c150..2400e50 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -363,6 +363,9 @@ LIB_OBJS += util/parse-options.o LIB_OBJS += util/parse-events.o LIB_OBJS += util/path.o LIB_OBJS += util/rbtree.o +LIB_OBJS += util/bitmap.o +LIB_OBJS += util/hweight.o +LIB_OBJS += util/find_next_bit.o LIB_OBJS += util/run-command.o LIB_OBJS += util/quote.o LIB_OBJS += util/strbuf.o @@ -790,6 +793,19 @@ util/config.o: util/config.c PERF-CFLAGS util/rbtree.o: ../../lib/rbtree.c PERF-CFLAGS $(QUIET_CC)$(CC) -o util/rbtree.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +# some perf warning policies can't fit to lib/bitmap.c, eg: it warns about variable shadowing +# from that comes from kernel headers wrapping. +KBITMAP_FLAGS=`echo $(ALL_CFLAGS) | sed s/-Wshadow// | sed s/-Wswitch-default// | sed s/-Wextra//` + +util/bitmap.o: ../../lib/bitmap.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/bitmap.o -c $(KBITMAP_FLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + +util/hweight.o: ../../lib/hweight.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/hweight.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + +util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< + perf-%$X: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8b2c860..fc3709c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -21,9 +21,6 @@ #include #include -#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) -#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) - static int fd[MAX_NR_CPUS][MAX_COUNTERS]; static long default_interval = 0; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 57ad3f4..807ca66 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -38,8 +38,6 @@ static int cwdlen; #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 -#define BUG_ON(x) assert(!(x)) - static u64 run_measurement_overhead; static u64 sleep_measurement_overhead; diff --git a/tools/perf/util/include/asm/bitops.h b/tools/perf/util/include/asm/bitops.h new file mode 100644 index 0000000..fbe4d92 --- /dev/null +++ b/tools/perf/util/include/asm/bitops.h @@ -0,0 +1,6 @@ +#include "../../../../include/asm-generic/bitops/__fls.h" +#include "../../../../include/asm-generic/bitops/fls.h" +#include "../../../../include/asm-generic/bitops/fls64.h" +#include "../../../../include/asm-generic/bitops/__ffs.h" +#include "../../../../include/asm-generic/bitops/ffz.h" +#include "../../../../include/asm-generic/bitops/hweight.h" diff --git a/tools/perf/util/include/asm/byteorder.h b/tools/perf/util/include/asm/byteorder.h new file mode 100644 index 0000000..39f367cf --- /dev/null +++ b/tools/perf/util/include/asm/byteorder.h @@ -0,0 +1,2 @@ +#include "../asm/types.h" +#include "../../../../include/linux/swab.h" diff --git a/tools/perf/util/include/asm/swab.h b/tools/perf/util/include/asm/swab.h new file mode 100644 index 0000000..ed53894 --- /dev/null +++ b/tools/perf/util/include/asm/swab.h @@ -0,0 +1 @@ +/* stub */ diff --git a/tools/perf/util/include/asm/types.h b/tools/perf/util/include/asm/types.h new file mode 100644 index 0000000..06703c6 --- /dev/null +++ b/tools/perf/util/include/asm/types.h @@ -0,0 +1,17 @@ +#ifndef PERF_ASM_TYPES_H_ +#define PERF_ASM_TYPES_H_ + +#include +#include "../../types.h" +#include + +/* CHECKME: Not sure both always match */ +#define BITS_PER_LONG __WORDSIZE + +typedef u64 __u64; +typedef u32 __u32; +typedef u16 __u16; +typedef u8 __u8; +typedef s64 __s64; + +#endif /* PERF_ASM_TYPES_H_ */ diff --git a/tools/perf/util/include/asm/uaccess.h b/tools/perf/util/include/asm/uaccess.h new file mode 100644 index 0000000..d0f72b8 --- /dev/null +++ b/tools/perf/util/include/asm/uaccess.h @@ -0,0 +1,14 @@ +#ifndef _PERF_ASM_UACCESS_H_ +#define _PERF_ASM_UACCESS_H_ + +#define __get_user(src, dest) \ +({ \ + (src) = *dest; \ + 0; \ +}) + +#define get_user __get_user + +#define access_ok(type, addr, size) 1 + +#endif diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h new file mode 100644 index 0000000..821c103 --- /dev/null +++ b/tools/perf/util/include/linux/bitmap.h @@ -0,0 +1,2 @@ +#include "../../../../include/linux/bitmap.h" +#include "../../../../include/asm-generic/bitops/find.h" diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h new file mode 100644 index 0000000..ace57c3 --- /dev/null +++ b/tools/perf/util/include/linux/bitops.h @@ -0,0 +1,27 @@ +#ifndef _PERF_LINUX_BITOPS_H_ +#define _PERF_LINUX_BITOPS_H_ + +#define __KERNEL__ + +#define CONFIG_GENERIC_FIND_NEXT_BIT +#define CONFIG_GENERIC_FIND_FIRST_BIT +#include "../../../../include/linux/bitops.h" + +static inline void set_bit(int nr, unsigned long *addr) +{ + addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); +} + +static __always_inline int test_bit(unsigned int nr, const unsigned long *addr) +{ + return ((1UL << (nr % BITS_PER_LONG)) & + (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; +} + +unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned + long size, unsigned long offset); + +unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned + long size, unsigned long offset); + +#endif diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h new file mode 100644 index 0000000..dfb0713 --- /dev/null +++ b/tools/perf/util/include/linux/compiler.h @@ -0,0 +1,10 @@ +#ifndef _PERF_LINUX_COMPILER_H_ +#define _PERF_LINUX_COMPILER_H_ + +#ifndef __always_inline +#define __always_inline inline +#endif +#define __user +#define __attribute_const__ + +#endif diff --git a/tools/perf/util/include/linux/ctype.h b/tools/perf/util/include/linux/ctype.h new file mode 100644 index 0000000..bae5783 --- /dev/null +++ b/tools/perf/util/include/linux/ctype.h @@ -0,0 +1 @@ +#include "../../../../include/linux/ctype.h" diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index a6b8739..4b9204d 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -1,6 +1,16 @@ #ifndef PERF_LINUX_KERNEL_H_ #define PERF_LINUX_KERNEL_H_ +#include +#include +#include +#include + +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) +#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) + #ifndef offsetof #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) #endif @@ -26,4 +36,53 @@ _max1 > _max2 ? _max1 : _max2; }) #endif +#ifndef min +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) +#endif + +#ifndef BUG_ON +#define BUG_ON(cond) assert(!(cond)) +#endif + +/* + * Both need more care to handle endianness + * (Don't use bitmap_copy_le() for now) + */ +#define cpu_to_le64(x) (x) +#define cpu_to_le32(x) (x) + +static inline int +vscnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int i; + ssize_t ssize = size; + + i = vsnprintf(buf, size, fmt, args); + + return (i >= ssize) ? (ssize - 1) : i; +} + +static inline int scnprintf(char * buf, size_t size, const char * fmt, ...) +{ + va_list args; + ssize_t ssize = size; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, size, fmt, args); + va_end(args); + + return (i >= ssize) ? (ssize - 1) : i; +} + +static inline unsigned long +simple_strtoul(const char *nptr, char **endptr, int base) +{ + return strtoul(nptr, endptr, base); +} + #endif diff --git a/tools/perf/util/include/linux/types.h b/tools/perf/util/include/linux/types.h new file mode 100644 index 0000000..ed53894 --- /dev/null +++ b/tools/perf/util/include/linux/types.h @@ -0,0 +1 @@ +/* stub */ -- cgit v0.10.2 From 2ba0825075e76236d22a20decd8e2346a99faabe Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 17 Oct 2009 17:12:34 +0200 Subject: perf tools: Introduce bitmask'ed additional headers This provides a new set of bitmasked headers. A new field is added in the perf headers that implements a bitmap storing optional features present in the perf.data file. The layout can be pictured like this: (Usual perf headers)(Features bitmap)[Feature 0][Feature n][Feature 255] If the bit n is set, then the feature n is used in this file. They are all set in order. This brings a backward and forward compatibility. The trace_info section has moved into such optional features, this is the first and only one for now. This is backward compatible with the .32 file version although it doesn't support the previous separate trace.info file. And finally it doesn't support the current interim development version. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt LKML-Reference: <1255792354-11304-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index fc3709c..f0467ff 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -574,11 +574,11 @@ static int __cmd_record(int argc, const char **argv) header = perf_header__new(); if (raw_samples) { - perf_header__set_trace_info(); + perf_header__feat_trace_info(header); } else { for (i = 0; i < nr_counters; i++) { if (attrs[i].sample_type & PERF_SAMPLE_RAW) { - perf_header__set_trace_info(); + perf_header__feat_trace_info(header); break; } } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 9aae360..171d51b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -8,6 +8,8 @@ #include "../perf.h" #include "trace-event.h" +#include + /* * Create new perf.data header attribute: */ @@ -48,25 +50,17 @@ void perf_header_attr__add_id(struct perf_header_attr *self, u64 id) */ struct perf_header *perf_header__new(void) { - struct perf_header *self = malloc(sizeof(*self)); + struct perf_header *self = calloc(sizeof(*self), 1); if (!self) die("nomem"); - self->frozen = 0; - - self->attrs = 0; self->size = 1; self->attr = malloc(sizeof(void *)); if (!self->attr) die("nomem"); - self->data_offset = 0; - self->data_size = 0; - self->trace_info_offset = 0; - self->trace_info_size = 0; - return self; } @@ -149,14 +143,12 @@ struct perf_file_header { struct perf_file_section attrs; struct perf_file_section data; struct perf_file_section event_types; - struct perf_file_section trace_info; + feat_mask_t adds_features; }; -static int trace_info; - -void perf_header__set_trace_info(void) +void perf_header__feat_trace_info(struct perf_header *header) { - trace_info = 1; + set_bit(HEADER_TRACE_INFO, perf_header__adds_mask(header)); } static void do_write(int fd, void *buf, size_t size) @@ -172,6 +164,32 @@ static void do_write(int fd, void *buf, size_t size) } } +static void perf_header__adds_write(struct perf_header *self, int fd) +{ + struct perf_file_section trace_sec; + u64 cur_offset = lseek(fd, 0, SEEK_CUR); + unsigned long *feat_mask = perf_header__adds_mask(self); + + if (test_bit(HEADER_TRACE_INFO, feat_mask)) { + /* Write trace info */ + trace_sec.offset = lseek(fd, sizeof(trace_sec), SEEK_CUR); + read_tracing_data(fd, attrs, nr_counters); + trace_sec.size = lseek(fd, 0, SEEK_CUR) - trace_sec.offset; + + /* Write trace info headers */ + lseek(fd, cur_offset, SEEK_SET); + do_write(fd, &trace_sec, sizeof(trace_sec)); + + /* + * Update cur_offset. So that other (future) + * features can set their own infos in this place. But if we are + * the only feature, at least that seeks to the place the data + * should begin. + */ + cur_offset = lseek(fd, trace_sec.offset + trace_sec.size, SEEK_SET); + } +}; + void perf_header__write(struct perf_header *self, int fd) { struct perf_file_header f_header; @@ -210,23 +228,7 @@ void perf_header__write(struct perf_header *self, int fd) if (events) do_write(fd, events, self->event_size); - if (trace_info) { - static int trace_info_written; - - /* - * Write it only once - */ - if (!trace_info_written) { - self->trace_info_offset = lseek(fd, 0, SEEK_CUR); - read_tracing_data(fd, attrs, nr_counters); - self->trace_info_size = lseek(fd, 0, SEEK_CUR) - - self->trace_info_offset; - trace_info_written = 1; - } else { - lseek(fd, self->trace_info_offset + - self->trace_info_size, SEEK_SET); - } - } + perf_header__adds_write(self, fd); self->data_offset = lseek(fd, 0, SEEK_CUR); @@ -246,12 +248,10 @@ void perf_header__write(struct perf_header *self, int fd) .offset = self->event_offset, .size = self->event_size, }, - .trace_info = { - .offset = self->trace_info_offset, - .size = self->trace_info_size, - }, }; + memcpy(&f_header.adds_features, &self->adds_features, sizeof(feat_mask_t)); + lseek(fd, 0, SEEK_SET); do_write(fd, &f_header, sizeof(f_header)); lseek(fd, self->data_offset + self->data_size, SEEK_SET); @@ -274,6 +274,20 @@ static void do_read(int fd, void *buf, size_t size) } } +static void perf_header__adds_read(struct perf_header *self, int fd) +{ + const unsigned long *feat_mask = perf_header__adds_mask(self); + + if (test_bit(HEADER_TRACE_INFO, feat_mask)) { + struct perf_file_section trace_sec; + + do_read(fd, &trace_sec, sizeof(trace_sec)); + lseek(fd, trace_sec.offset, SEEK_SET); + trace_report(fd); + lseek(fd, trace_sec.offset + trace_sec.size, SEEK_SET); + } +}; + struct perf_header *perf_header__read(int fd) { struct perf_header *self = perf_header__new(); @@ -292,9 +306,11 @@ struct perf_header *perf_header__read(int fd) if (f_header.size != sizeof(f_header)) { /* Support the previous format */ - if (f_header.size == offsetof(typeof(f_header), trace_info)) - f_header.trace_info.size = 0; - else + if (f_header.size == offsetof(typeof(f_header), adds_features)) { + unsigned long *mask = (unsigned long *)(void *) + &f_header.adds_features; + bitmap_zero(mask, HEADER_FEAT_BITS); + } else die("incompatible file format"); } nr_attrs = f_header.attrs.size / sizeof(f_attr); @@ -330,13 +346,9 @@ struct perf_header *perf_header__read(int fd) event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } - self->trace_info_offset = f_header.trace_info.offset; - self->trace_info_size = f_header.trace_info.size; + memcpy(&self->adds_features, &f_header.adds_features, sizeof(feat_mask_t)); - if (self->trace_info_size) { - lseek(fd, self->trace_info_offset, SEEK_SET); - trace_report(fd); - } + perf_header__adds_read(self, fd); self->event_offset = f_header.event_types.offset; self->event_size = f_header.event_types.size; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 30aee51..0eb4a91 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -12,19 +12,29 @@ struct perf_header_attr { off_t id_offset; }; +#define HEADER_TRACE_INFO 1 + +#define HEADER_FEAT_BITS 256 + +typedef typeof(u64[HEADER_FEAT_BITS / 8]) feat_mask_t; + struct perf_header { - int frozen; - int attrs, size; + int frozen; + int attrs, size; struct perf_header_attr **attr; - s64 attr_offset; - u64 data_offset; - u64 data_size; - u64 event_offset; - u64 event_size; - u64 trace_info_offset; - u64 trace_info_size; + s64 attr_offset; + u64 data_offset; + u64 data_size; + u64 event_offset; + u64 event_size; + feat_mask_t adds_features; }; +static inline unsigned long *perf_header__adds_mask(struct perf_header *self) +{ + return (unsigned long *)(void *)&self->adds_features; +} + struct perf_header *perf_header__read(int fd); void perf_header__write(struct perf_header *self, int fd); @@ -42,7 +52,7 @@ void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); u64 perf_header__sample_type(struct perf_header *header); struct perf_event_attr * perf_header__find_attr(u64 id, struct perf_header *header); -void perf_header__set_trace_info(void); +void perf_header__feat_trace_info(struct perf_header *header); struct perf_header *perf_header__new(void); -- cgit v0.10.2 From db9f11e36d0125a5e3e595ea9ef2e4b89f7e8737 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 17 Oct 2009 17:57:18 +0200 Subject: perf tools: Use DECLARE_BITMAP instead of an open-coded array Use DECLARE_BITMAP instead of an open coded array for our bitmap of featured sections. This makes the array an unsigned long instead of a u64 but since we use a 256 bits bitmap, the array size shouldn't vary between different boxes. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt LKML-Reference: <1255795038-13751-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 171d51b..622c60e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -8,8 +8,6 @@ #include "../perf.h" #include "trace-event.h" -#include - /* * Create new perf.data header attribute: */ @@ -143,12 +141,12 @@ struct perf_file_header { struct perf_file_section attrs; struct perf_file_section data; struct perf_file_section event_types; - feat_mask_t adds_features; + DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); }; void perf_header__feat_trace_info(struct perf_header *header) { - set_bit(HEADER_TRACE_INFO, perf_header__adds_mask(header)); + set_bit(HEADER_TRACE_INFO, header->adds_features); } static void do_write(int fd, void *buf, size_t size) @@ -168,7 +166,7 @@ static void perf_header__adds_write(struct perf_header *self, int fd) { struct perf_file_section trace_sec; u64 cur_offset = lseek(fd, 0, SEEK_CUR); - unsigned long *feat_mask = perf_header__adds_mask(self); + unsigned long *feat_mask = self->adds_features; if (test_bit(HEADER_TRACE_INFO, feat_mask)) { /* Write trace info */ @@ -250,7 +248,7 @@ void perf_header__write(struct perf_header *self, int fd) }, }; - memcpy(&f_header.adds_features, &self->adds_features, sizeof(feat_mask_t)); + memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features)); lseek(fd, 0, SEEK_SET); do_write(fd, &f_header, sizeof(f_header)); @@ -276,7 +274,7 @@ static void do_read(int fd, void *buf, size_t size) static void perf_header__adds_read(struct perf_header *self, int fd) { - const unsigned long *feat_mask = perf_header__adds_mask(self); + const unsigned long *feat_mask = self->adds_features; if (test_bit(HEADER_TRACE_INFO, feat_mask)) { struct perf_file_section trace_sec; @@ -306,11 +304,9 @@ struct perf_header *perf_header__read(int fd) if (f_header.size != sizeof(f_header)) { /* Support the previous format */ - if (f_header.size == offsetof(typeof(f_header), adds_features)) { - unsigned long *mask = (unsigned long *)(void *) - &f_header.adds_features; - bitmap_zero(mask, HEADER_FEAT_BITS); - } else + if (f_header.size == offsetof(typeof(f_header), adds_features)) + bitmap_zero(f_header.adds_features, HEADER_FEAT_BITS); + else die("incompatible file format"); } nr_attrs = f_header.attrs.size / sizeof(f_attr); @@ -346,7 +342,7 @@ struct perf_header *perf_header__read(int fd) event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } - memcpy(&self->adds_features, &f_header.adds_features, sizeof(feat_mask_t)); + memcpy(&self->adds_features, &f_header.adds_features, sizeof(f_header.adds_features)); perf_header__adds_read(self, fd); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 0eb4a91..2ea9dfb 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -5,6 +5,8 @@ #include #include "types.h" +#include + struct perf_header_attr { struct perf_event_attr attr; int ids, size; @@ -16,8 +18,6 @@ struct perf_header_attr { #define HEADER_FEAT_BITS 256 -typedef typeof(u64[HEADER_FEAT_BITS / 8]) feat_mask_t; - struct perf_header { int frozen; int attrs, size; @@ -27,14 +27,9 @@ struct perf_header { u64 data_size; u64 event_offset; u64 event_size; - feat_mask_t adds_features; + DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); }; -static inline unsigned long *perf_header__adds_mask(struct perf_header *self) -{ - return (unsigned long *)(void *)&self->adds_features; -} - struct perf_header *perf_header__read(int fd); void perf_header__write(struct perf_header *self, int fd); diff --git a/tools/perf/util/include/asm/asm-offsets.h b/tools/perf/util/include/asm/asm-offsets.h new file mode 100644 index 0000000..ed53894 --- /dev/null +++ b/tools/perf/util/include/asm/asm-offsets.h @@ -0,0 +1 @@ +/* stub */ diff --git a/tools/perf/util/include/linux/types.h b/tools/perf/util/include/linux/types.h index ed53894..858a38d 100644 --- a/tools/perf/util/include/linux/types.h +++ b/tools/perf/util/include/linux/types.h @@ -1 +1,7 @@ -/* stub */ +#ifndef _PERF_LINUX_TYPES_H_ +#define _PERF_LINUX_TYPES_H_ + +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +#endif -- cgit v0.10.2 From 1abc7f5500fff8422f34826a006648d8741d83d3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 18 Oct 2009 19:20:24 -0700 Subject: perf tools: Display better error messages on missing packages Check for libelf headers and glibc headers separately so that the error message correctly identifies which package installation is missing/needed. Signed-off-by: Randy Dunlap Cc: paulus@samba.org Cc: a.p.zijlstra@chello.nl Cc: efault@gmx.de Cc: fweisbec@gmail.com Cc: Arnaldo Carvalho de Melo LKML-Reference: <4ADBCCE8.3060300@oracle.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 2400e50..db89a6d 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -431,8 +431,12 @@ ifeq ($(uname_S),Darwin) PTHREAD_LIBS = endif +ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) + msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]); +endif + ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) - msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); + msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel); endif ifdef NO_DEMANGLE -- cgit v0.10.2 From 12133afffcc7140eea915b1572189a2ea0cf7b0e Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Mon, 19 Oct 2009 12:03:33 +0200 Subject: perf stat: Add branch performance events to default output Adds performance event information about branches and branch misses to the default output of perf stat. Signed-off-by: Tim Blechmann Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <4ADC3975.8050109@klingt.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c373683..95a55ea 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -59,6 +59,8 @@ static struct perf_event_attr default_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS}, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, }; -- cgit v0.10.2 From 56aab464ff6232bcc2f53b26576983dc83f75db7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2009 13:27:08 +0200 Subject: perf stat: Re-align the default_attrs[] array Clean up the array definition to be vertically aligned. No functional effects. Cc: Tim Blechmann Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <4ADC3975.8050109@klingt.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c373683..95a55ea 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -59,6 +59,8 @@ static struct perf_event_attr default_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS}, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, }; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 95a55ea..90e0a26 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -50,17 +50,17 @@ static struct perf_event_attr default_attrs[] = { - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, - - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS}, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, + + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, }; -- cgit v0.10.2 From dd86e72abdbc4b436471af5a97927c6145f5298c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 19 Oct 2009 13:33:03 +0200 Subject: perf stat: Count branches first Count branches first, cache-misses second. The reason is that on x86 branches are not counted by all counters on all CPUs. Before: Performance counter stats for 'ls': 0.756653 task-clock-msecs # 0.802 CPUs 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 250 page-faults # 0.330 M/sec 2375725 cycles # 3139.781 M/sec 1628129 instructions # 0.685 IPC 19643 cache-references # 25.960 M/sec 4608 cache-misses # 6.090 M/sec 342532 branches # 452.694 M/sec branch-misses 0.000943356 seconds time elapsed After: Performance counter stats for 'ls': 1.056734 task-clock-msecs # 0.859 CPUs 0 context-switches # 0.000 M/sec 0 CPU-migrations # 0.000 M/sec 259 page-faults # 0.245 M/sec 3345932 cycles # 3166.295 M/sec 3074090 instructions # 0.919 IPC 616928 branches # 583.806 M/sec 39279 branch-misses # 6.367 % 21312 cache-references # 20.168 M/sec 3661 cache-misses # 3.464 M/sec 0.001230551 seconds time elapsed (also prettify the printout of branch misses, in case it's getting scaled.) Cc: Tim Blechmann Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <4ADC3975.8050109@klingt.org> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c373683..95a55ea 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -59,6 +59,8 @@ static struct perf_event_attr default_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS}, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, }; --- tools/perf/builtin-stat.c | 20 ++++++++++---------- 1 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 95a55ea..90e0a26 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -50,17 +50,17 @@ static struct perf_event_attr default_attrs[] = { - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, - - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS}, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, + + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, }; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 90e0a26..c6df377 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -57,10 +57,10 @@ static struct perf_event_attr default_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, }; @@ -363,7 +363,7 @@ static void abs_printout(int counter, double avg) if (total) ratio = avg * 100 / total; - fprintf(stderr, " # %10.3f %% ", ratio); + fprintf(stderr, " # %10.3f %% ", ratio); } else { total = avg_stats(&runtime_nsecs_stats); -- cgit v0.10.2 From b7f3008ad1d795935551e4dd810b0255a7bfa3c9 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 19 Oct 2009 10:08:50 -0400 Subject: SELinux: fix locking issue introduced with c6d3aaa4e35c71a3 Ensure that we release the policy read lock on all exit paths from security_compute_av. Signed-off-by: Stephen D. Smalley Signed-off-by: James Morris diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index f270e37..77f6e54 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -935,19 +935,22 @@ int security_compute_av(u32 ssid, u32 requested; int rc; + read_lock(&policy_rwlock); + if (!ss_initialized) goto allow; - read_lock(&policy_rwlock); requested = unmap_perm(orig_tclass, orig_requested); tclass = unmap_class(orig_tclass); if (unlikely(orig_tclass && !tclass)) { if (policydb.allow_unknown) goto allow; - return -EINVAL; + rc = -EINVAL; + goto out; } rc = security_compute_av_core(ssid, tsid, tclass, requested, avd); map_decision(orig_tclass, avd, policydb.allow_unknown); +out: read_unlock(&policy_rwlock); return rc; allow: @@ -956,7 +959,8 @@ allow: avd->auditdeny = 0xffffffff; avd->seqno = latest_granting; avd->flags = 0; - return 0; + rc = 0; + goto out; } int security_compute_av_user(u32 ssid, -- cgit v0.10.2 From 20639c15d2e78f180d398a6b6422880fac3258bb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2009 15:11:36 -0200 Subject: perf tools: Add missing tools/perf/util/include/string.h To cure a bunch of: In file included from util/include/linux/bitmap.h:1, from util/header.h:8, from builtin-trace.c:7: util/include/../../../../include/linux/bitmap.h:8:26: error: linux/string.h: No such file or directory make: *** [builtin-trace.o] Error 1 make: *** Waiting for unfinished jobs.... Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra LKML-Reference: <1255972296-11500-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index db89a6d..3b154f1 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -329,6 +329,7 @@ LIB_H += ../../include/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h LIB_H += util/include/linux/list.h +LIB_H += util/include/linux/string.h LIB_H += perf.h LIB_H += util/event.h LIB_H += util/types.h diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h new file mode 100644 index 0000000..3b2f590 --- /dev/null +++ b/tools/perf/util/include/linux/string.h @@ -0,0 +1 @@ +#include -- cgit v0.10.2 From 79b9ad361be8c6f3eeea97dd3883e8bcfa989333 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2009 15:31:31 -0200 Subject: perf tools: Add bunch of missing headers to LIB_H Build dependencies were not properly mapped out. Signed-off-by: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Frederic Weisbecker LKML-Reference: <1255973491-11626-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3b154f1..64c6b7b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -328,8 +328,25 @@ LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h +LIB_H += util/include/linux/bitmap.h +LIB_H += util/include/linux/bitops.h +LIB_H += util/include/linux/compiler.h +LIB_H += util/include/linux/ctype.h +LIB_H += util/include/linux/kernel.h LIB_H += util/include/linux/list.h +LIB_H += util/include/linux/module.h +LIB_H += util/include/linux/poison.h +LIB_H += util/include/linux/prefetch.h +LIB_H += util/include/linux/rbtree.h LIB_H += util/include/linux/string.h +LIB_H += util/include/linux/types.h +LIB_H += util/include/asm/asm-offsets.h +LIB_H += util/include/asm/bitops.h +LIB_H += util/include/asm/byteorder.h +LIB_H += util/include/asm/swab.h +LIB_H += util/include/asm/system.h +LIB_H += util/include/asm/types.h +LIB_H += util/include/asm/uaccess.h LIB_H += perf.h LIB_H += util/event.h LIB_H += util/types.h -- cgit v0.10.2 From 3e1c2515acf70448cad1ae3ab835ca80be043d33 Mon Sep 17 00:00:00 2001 From: James Morris Date: Tue, 20 Oct 2009 13:48:33 +0900 Subject: security: remove root_plug Remove the root_plug example LSM code. It's unmaintained and increasingly broken in various ways. Made at the 2009 Kernel Summit in Tokyo! Acked-by: Greg Kroah-Hartman Signed-off-by: James Morris diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6fa7292..5d386b4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -85,7 +85,6 @@ parameter is applicable: PPT Parallel port support is enabled. PS2 Appropriate PS/2 support is enabled. RAM RAM disk support is enabled. - ROOTPLUG The example Root Plug LSM is enabled. S390 S390 architecture is enabled. SCSI Appropriate SCSI support is enabled. A lot of drivers has their options described inside of @@ -2163,15 +2162,6 @@ and is between 256 and 4096 characters. It is defined in the file Useful for devices that are detected asynchronously (e.g. USB and MMC devices). - root_plug.vendor_id= - [ROOTPLUG] Override the default vendor ID - - root_plug.product_id= - [ROOTPLUG] Override the default product ID - - root_plug.debug= - [ROOTPLUG] Enable debugging output - rw [KNL] Mount root device read-write on boot S [KNL] Run init in single mode diff --git a/security/Kconfig b/security/Kconfig index fb363cd..aeea8c2 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -100,19 +100,6 @@ config SECURITY_FILE_CAPABILITIES If in doubt, answer N. -config SECURITY_ROOTPLUG - bool "Root Plug Support" - depends on USB=y && SECURITY - help - This is a sample LSM module that should only be used as such. - It prevents any programs running with egid == 0 if a specific - USB device is not present in the system. - - See for - more information about this module. - - If you are unsure how to answer this question, answer N. - config INTEL_TXT bool "Enable Intel(R) Trusted Execution Technology (Intel(R) TXT)" depends on HAVE_INTEL_TXT diff --git a/security/Makefile b/security/Makefile index 95ecc06..bb44e35 100644 --- a/security/Makefile +++ b/security/Makefile @@ -18,7 +18,6 @@ obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o obj-$(CONFIG_AUDIT) += lsm_audit.o obj-$(CONFIG_SECURITY_TOMOYO) += tomoyo/built-in.o -obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o # Object integrity file lists diff --git a/security/commoncap.c b/security/commoncap.c index fe30751..45b87af 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -1,4 +1,4 @@ -/* Common capabilities, needed by capability.o and root_plug.o +/* Common capabilities, needed by capability.o. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/security/root_plug.c b/security/root_plug.c deleted file mode 100644 index 2f7ffa6..0000000 --- a/security/root_plug.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Root Plug sample LSM module - * - * Originally written for a Linux Journal. - * - * Copyright (C) 2002 Greg Kroah-Hartman - * - * Prevents any programs running with egid == 0 if a specific USB device - * is not present in the system. Yes, it can be gotten around, but is a - * nice starting point for people to play with, and learn the LSM - * interface. - * - * If you want to turn this into something with a semblance of security, - * you need to hook the task_* functions also. - * - * See http://www.linuxjournal.com/article.php?sid=6279 for more information - * about this code. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2 of the - * License. - */ - -#include -#include -#include -#include -#include - -/* default is a generic type of usb to serial converter */ -static int vendor_id = 0x0557; -static int product_id = 0x2008; - -module_param(vendor_id, uint, 0400); -module_param(product_id, uint, 0400); - -/* should we print out debug messages */ -static int debug = 0; - -module_param(debug, bool, 0600); - -#define MY_NAME "root_plug" - -#define root_dbg(fmt, arg...) \ - do { \ - if (debug) \ - printk(KERN_DEBUG "%s: %s: " fmt , \ - MY_NAME , __func__ , \ - ## arg); \ - } while (0) - -static int rootplug_bprm_check_security (struct linux_binprm *bprm) -{ - struct usb_device *dev; - - root_dbg("file %s, e_uid = %d, e_gid = %d\n", - bprm->filename, bprm->cred->euid, bprm->cred->egid); - - if (bprm->cred->egid == 0) { - dev = usb_find_device(vendor_id, product_id); - if (!dev) { - root_dbg("e_gid = 0, and device not found, " - "task not allowed to run...\n"); - return -EPERM; - } - usb_put_dev(dev); - } - - return 0; -} - -static struct security_operations rootplug_security_ops = { - .bprm_check_security = rootplug_bprm_check_security, -}; - -static int __init rootplug_init (void) -{ - /* register ourselves with the security framework */ - if (register_security (&rootplug_security_ops)) { - printk (KERN_INFO - "Failure registering Root Plug module with the kernel\n"); - return -EINVAL; - } - printk (KERN_INFO "Root Plug module initialized, " - "vendor_id = %4.4x, product id = %4.4x\n", vendor_id, product_id); - return 0; -} - -security_initcall (rootplug_init); -- cgit v0.10.2 From bbe2987bea26a684ff11d887dfc4cf39b22c27a2 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 20 Oct 2009 07:09:39 +0900 Subject: perf timechart: Add a process filter During the Kernel Summit demo of perf/ftrace/timechart, there was a feature request to have a process filter for timechart so that you can zoom into one or a few processes that you are really interested in. This patch adds basic support for this feature, the -p (--process) option now can select a PID or a process name to be shown. Multiple -p options are allowed, and the combined set will be included in the output. Signed-off-by: Arjan van de Ven Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091020070939.7d0fb8a7@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index a791009..4b17883 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -31,9 +31,12 @@ OPTIONS -w:: --width=:: Select the width of the SVG file (default: 1000) --p:: +-P:: --power-only:: Only output the CPU power section of the diagram +-p:: +--process:: + Select the processes to display, by name or PID SEE ALSO diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index e8a510d9..34fad57 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -153,6 +153,17 @@ static struct wake_event *wake_events; struct sample_wrapper *all_samples; + +struct process_filter; +struct process_filter { + char *name; + int pid; + struct process_filter *next; +}; + +static struct process_filter *process_filter; + + static struct per_pid *find_create_pid(int pid) { struct per_pid *cursor = all_data; @@ -763,11 +774,11 @@ static void draw_wakeups(void) c = p->all; while (c) { if (c->Y && c->start_time <= we->time && c->end_time >= we->time) { - if (p->pid == we->waker) { + if (p->pid == we->waker && !from) { from = c->Y; task_from = strdup(c->comm); } - if (p->pid == we->wakee) { + if (p->pid == we->wakee && !to) { to = c->Y; task_to = strdup(c->comm); } @@ -882,12 +893,89 @@ static void draw_process_bars(void) } } +static void add_process_filter(const char *string) +{ + struct process_filter *filt; + int pid; + + pid = strtoull(string, NULL, 10); + filt = malloc(sizeof(struct process_filter)); + if (!filt) + return; + + filt->name = strdup(string); + filt->pid = pid; + filt->next = process_filter; + + process_filter = filt; +} + +static int passes_filter(struct per_pid *p, struct per_pidcomm *c) +{ + struct process_filter *filt; + if (!process_filter) + return 1; + + filt = process_filter; + while (filt) { + if (filt->pid && p->pid == filt->pid) + return 1; + if (strcmp(filt->name, c->comm) == 0) + return 1; + filt = filt->next; + } + return 0; +} + +static int determine_display_tasks_filtered(void) +{ + struct per_pid *p; + struct per_pidcomm *c; + int count = 0; + + p = all_data; + while (p) { + p->display = 0; + if (p->start_time == 1) + p->start_time = first_time; + + /* no exit marker, task kept running to the end */ + if (p->end_time == 0) + p->end_time = last_time; + + c = p->all; + + while (c) { + c->display = 0; + + if (c->start_time == 1) + c->start_time = first_time; + + if (passes_filter(p, c)) { + c->display = 1; + p->display = 1; + count++; + } + + if (c->end_time == 0) + c->end_time = last_time; + + c = c->next; + } + p = p->next; + } + return count; +} + static int determine_display_tasks(u64 threshold) { struct per_pid *p; struct per_pidcomm *c; int count = 0; + if (process_filter) + return determine_display_tasks_filtered(); + p = all_data; while (p) { p->display = 0; @@ -1153,6 +1241,14 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } +static int +parse_process(const struct option *opt __used, const char *arg, int __used unset) +{ + if (arg) + add_process_filter(arg); + return 0; +} + static const struct option options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), @@ -1160,8 +1256,11 @@ static const struct option options[] = { "output file name"), OPT_INTEGER('w', "width", &svg_page_width, "page width"), - OPT_BOOLEAN('p', "power-only", &power_only, + OPT_BOOLEAN('P', "power-only", &power_only, "output power data only"), + OPT_CALLBACK('p', "process", NULL, "process", + "process selector. Pass a pid or process name.", + parse_process), OPT_END() }; -- cgit v0.10.2 From ed52ce2e3c33dc7626a40fa2da766d1a6460e543 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 19 Oct 2009 17:17:57 -0200 Subject: perf tools: Add ->unmap_ip operation to struct map We need this because we get section relative addresses when reading the symtabs, but when a tool like 'perf annotate' needs to match these address to what 'objdump -dS' produces we need the address + section back again. So in annotate now we look at the 'struct hist_entry' instances (that weren't really being used) so that we iterate only over the symbols that had some hit and get the map where that particular hit happened so that we can get the right address to match with annotate. Verified that at least: perf annotate mmap_read_counter # Uses the ~/bin/perf binary perf annotate --vmlinux /home/acme/git/build/perf/vmlinux intel_pmu_enable_all on a 'perf record perf top' session seems to work. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1255979877-12533-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 56ba716..06f1027 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -58,9 +58,12 @@ static void hist_hit(struct hist_entry *he, u64 ip) return; sym_size = sym->end - sym->start; - ip = he->map->map_ip(he->map, ip); offset = ip - sym->start; + if (verbose) + fprintf(stderr, "%s: ip=%Lx\n", __func__, + he->map->unmap_ip(he->map, ip)); + if (offset >= sym_size) return; @@ -83,8 +86,7 @@ static int hist_entry__add(struct thread *thread, struct map *map, count, level, &hit); if (he == NULL) return -ENOMEM; - if (hit) - hist_hit(he, ip); + hist_hit(he, ip); return 0; } @@ -260,14 +262,15 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static int -parse_line(FILE *file, struct symbol *sym, u64 len) +static int parse_line(FILE *file, struct hist_entry *he, u64 len) { + struct symbol *sym = he->sym; char *line = NULL, *tmp, *tmp2; static const char *prev_line; static const char *prev_color; unsigned int offset; size_t line_len; + u64 start; s64 line_ip; int ret; char *c; @@ -304,6 +307,8 @@ parse_line(FILE *file, struct symbol *sym, u64 len) line_ip = -1; } + start = he->map->unmap_ip(he->map, sym->start); + if (line_ip != -1) { const char *path = NULL; unsigned int hits = 0; @@ -311,7 +316,7 @@ parse_line(FILE *file, struct symbol *sym, u64 len) const char *color; struct sym_ext *sym_ext = sym->priv; - offset = line_ip - sym->start; + offset = line_ip - start; if (offset < len) hits = sym->hist[offset]; @@ -390,8 +395,10 @@ static void free_source_line(struct symbol *sym, int len) /* Get the filename:line for the colored entries */ static void -get_source_line(struct symbol *sym, int len, const char *filename) +get_source_line(struct hist_entry *he, int len, const char *filename) { + struct symbol *sym = he->sym; + u64 start; int i; char cmd[PATH_MAX * 2]; struct sym_ext *sym_ext; @@ -404,6 +411,7 @@ get_source_line(struct symbol *sym, int len, const char *filename) return; sym_ext = sym->priv; + start = he->map->unmap_ip(he->map, sym->start); for (i = 0; i < len; i++) { char *path = NULL; @@ -415,7 +423,7 @@ get_source_line(struct symbol *sym, int len, const char *filename) if (sym_ext[i].percent <= 0.5) continue; - offset = sym->start + i; + offset = start + i; sprintf(cmd, "addr2line -e %s %016llx", filename, offset); fp = popen(cmd, "r"); if (!fp) @@ -465,8 +473,11 @@ static void print_summary(const char *filename) } } -static void annotate_sym(struct dso *dso, struct symbol *sym) +static void annotate_sym(struct hist_entry *he) { + struct map *map = he->map; + struct dso *dso = map->dso; + struct symbol *sym = he->sym; const char *filename = dso->long_name, *d_filename; u64 len; char command[PATH_MAX*2]; @@ -475,6 +486,12 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) if (!filename) return; + if (verbose) + fprintf(stderr, "%s: filename=%s, sym=%s, start=%Lx, end=%Lx\n", + __func__, filename, sym->name, + map->unmap_ip(map, sym->start), + map->unmap_ip(map, sym->end)); + if (full_paths) d_filename = filename; else @@ -483,7 +500,7 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) len = sym->end - sym->start; if (print_line) { - get_source_line(sym, len, filename); + get_source_line(he, len, filename); print_summary(filename); } @@ -496,7 +513,8 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) dso, dso->long_name, sym, sym->name); sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s|grep -v %s", - sym->start, sym->end, filename, filename); + map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end), + filename, filename); if (verbose >= 3) printf("doing: %s\n", command); @@ -506,7 +524,7 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) return; while (!feof(file)) { - if (parse_line(file, sym, len) < 0) + if (parse_line(file, he, len) < 0) break; } @@ -518,18 +536,22 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) static void find_annotations(void) { struct rb_node *nd; - struct dso *dso; int count = 0; - list_for_each_entry(dso, &dsos, node) { + for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { + struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); - for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) { - struct symbol *sym = rb_entry(nd, struct symbol, rb_node); + if (he->sym && he->sym->hist) { + annotate_sym(he); + count++; + /* + * Since we have a hist_entry per IP for the same + * symbol, free he->sym->hist to signal we already + * processed this symbol. + */ + free(he->sym->hist); + he->sym->hist = NULL; - if (sym->hist) { - annotate_sym(dso, sym); - count++; - } } } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c2e62be..6b5be56 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -82,6 +82,7 @@ struct map { u64 end; u64 pgoff; u64 (*map_ip)(struct map *, u64); + u64 (*unmap_ip)(struct map *, u64); struct dso *dso; }; @@ -90,7 +91,12 @@ static inline u64 map__map_ip(struct map *map, u64 ip) return ip - map->start + map->pgoff; } -static inline u64 vdso__map_ip(struct map *map __used, u64 ip) +static inline u64 map__unmap_ip(struct map *map, u64 ip) +{ + return ip + map->start - map->pgoff; +} + +static inline u64 identity__map_ip(struct map *map __used, u64 ip) { return ip; } diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 804e023..4e203d1 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -54,9 +54,11 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen) goto out_delete; if (self->dso == vdso || anon) - self->map_ip = vdso__map_ip; - else + self->map_ip = self->unmap_ip = identity__map_ip; + else { self->map_ip = map__map_ip; + self->unmap_ip = map__unmap_ip; + } } return self; out_delete: diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index faa84f5..3350119 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -337,7 +337,7 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) return -1; } - map->map_ip = vdso__map_ip; + map->map_ip = map->unmap_ip = identity__map_ip; kernel_maps__insert(map); ++kernel_range; } @@ -790,7 +790,8 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, dso__delete(curr_dso); goto out_elf_end; } - curr_map->map_ip = vdso__map_ip; + curr_map->map_ip = identity__map_ip; + curr_map->unmap_ip = identity__map_ip; curr_dso->origin = DSO__ORIG_KERNEL; kernel_maps__insert(curr_map); dsos__add(curr_dso); @@ -1158,6 +1159,7 @@ static struct map *map__new2(u64 start, struct dso *dso) self->pgoff = 0; self->dso = dso; self->map_ip = map__map_ip; + self->unmap_ip = map__unmap_ip; RB_CLEAR_NODE(&self->rb_node); } return self; @@ -1259,7 +1261,7 @@ int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, if (kernel_map == NULL) goto out_delete_dso; - kernel_map->map_ip = vdso__map_ip; + kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; if (use_modules && dsos__load_modules(sym_priv_size) < 0) { fprintf(stderr, "Failed to load list of modules in use! " -- cgit v0.10.2 From e42049926ebdcae24fdfdc8f0e3ff8f05f24a60b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2009 14:25:40 -0200 Subject: perf annotate: Use the sym_priv_size area for the histogram We have this sym_priv_size mechanism for attaching private areas to struct symbol entries but annotate wasn't using it, adding private areas to struct symbol in addition to a ->priv pointer. Scrap all that and use the sym_priv_size mechanism. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256055940-19511-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 06f1027..2456925 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -37,12 +37,44 @@ static int print_line; static unsigned long page_size; static unsigned long mmap_window = 32; +struct sym_hist { + u64 sum; + u64 ip[0]; +}; + struct sym_ext { struct rb_node node; double percent; char *path; }; +struct sym_priv { + struct sym_hist *hist; + struct sym_ext *ext; +}; + +static const char *sym_hist_filter; + +static int symbol_filter(struct map *map, struct symbol *sym) +{ + if (strcmp(sym->name, sym_hist_filter) == 0) { + struct sym_priv *priv = dso__sym_priv(map->dso, sym); + const int size = (sizeof(*priv->hist) + + (sym->end - sym->start) * sizeof(u64)); + + priv->hist = malloc(size); + if (priv->hist) + memset(priv->hist, 0, size); + return 0; + } + /* + * FIXME: We should really filter it out, as we don't want to go thru symbols + * we're not interested, and if a DSO ends up with no symbols, delete it too, + * but right now the kernel loading routines in symbol.c bail out if no symbols + * are found, fix it later. + */ + return 0; +} /* * collect histogram counts @@ -51,10 +83,16 @@ static void hist_hit(struct hist_entry *he, u64 ip) { unsigned int sym_size, offset; struct symbol *sym = he->sym; + struct sym_priv *priv; + struct sym_hist *h; he->count++; - if (!sym || !sym->hist) + if (!sym || !he->map) + return; + + priv = dso__sym_priv(he->map->dso, sym); + if (!priv->hist) return; sym_size = sym->end - sym->start; @@ -67,15 +105,16 @@ static void hist_hit(struct hist_entry *he, u64 ip) if (offset >= sym_size) return; - sym->hist_sum++; - sym->hist[offset]++; + h = priv->hist; + h->sum++; + h->ip[offset]++; if (verbose >= 3) printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n", (void *)(unsigned long)he->sym->start, he->sym->name, (void *)(unsigned long)ip, ip - he->sym->start, - sym->hist[offset]); + h->ip[offset]); } static int hist_entry__add(struct thread *thread, struct map *map, @@ -162,7 +201,9 @@ got_map: static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { - struct map *map = map__new(&event->mmap, NULL, 0); + struct map *map = map__new(&event->mmap, NULL, 0, + sizeof(struct sym_priv), symbol_filter, + verbose); struct thread *thread = threads__findnew(event->mmap.pid); dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n", @@ -314,17 +355,19 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len) unsigned int hits = 0; double percent = 0.0; const char *color; - struct sym_ext *sym_ext = sym->priv; + struct sym_priv *priv = dso__sym_priv(he->map->dso, sym); + struct sym_ext *sym_ext = priv->ext; + struct sym_hist *h = priv->hist; offset = line_ip - start; if (offset < len) - hits = sym->hist[offset]; + hits = h->ip[offset]; if (offset < len && sym_ext) { path = sym_ext[offset].path; percent = sym_ext[offset].percent; - } else if (sym->hist_sum) - percent = 100.0 * hits / sym->hist_sum; + } else if (h->sum) + percent = 100.0 * hits / h->sum; color = get_percent_color(percent); @@ -377,9 +420,10 @@ static void insert_source_line(struct sym_ext *sym_ext) rb_insert_color(&sym_ext->node, &root_sym_ext); } -static void free_source_line(struct symbol *sym, int len) +static void free_source_line(struct hist_entry *he, int len) { - struct sym_ext *sym_ext = sym->priv; + struct sym_priv *priv = dso__sym_priv(he->map->dso, he->sym); + struct sym_ext *sym_ext = priv->ext; int i; if (!sym_ext) @@ -389,7 +433,7 @@ static void free_source_line(struct symbol *sym, int len) free(sym_ext[i].path); free(sym_ext); - sym->priv = NULL; + priv->ext = NULL; root_sym_ext = RB_ROOT; } @@ -402,15 +446,16 @@ get_source_line(struct hist_entry *he, int len, const char *filename) int i; char cmd[PATH_MAX * 2]; struct sym_ext *sym_ext; + struct sym_priv *priv = dso__sym_priv(he->map->dso, sym); + struct sym_hist *h = priv->hist; - if (!sym->hist_sum) + if (!h->sum) return; - sym->priv = calloc(len, sizeof(struct sym_ext)); - if (!sym->priv) + sym_ext = priv->ext = calloc(len, sizeof(struct sym_ext)); + if (!priv->ext) return; - sym_ext = sym->priv; start = he->map->unmap_ip(he->map, sym->start); for (i = 0; i < len; i++) { @@ -419,7 +464,7 @@ get_source_line(struct hist_entry *he, int len, const char *filename) u64 offset; FILE *fp; - sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum; + sym_ext[i].percent = 100.0 * h->ip[i] / h->sum; if (sym_ext[i].percent <= 0.5) continue; @@ -530,7 +575,7 @@ static void annotate_sym(struct hist_entry *he) pclose(file); if (print_line) - free_source_line(sym, len); + free_source_line(he, len); } static void find_annotations(void) @@ -540,19 +585,23 @@ static void find_annotations(void) for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); + struct sym_priv *priv; - if (he->sym && he->sym->hist) { - annotate_sym(he); - count++; - /* - * Since we have a hist_entry per IP for the same - * symbol, free he->sym->hist to signal we already - * processed this symbol. - */ - free(he->sym->hist); - he->sym->hist = NULL; + if (he->sym == NULL) + continue; - } + priv = dso__sym_priv(he->map->dso, he->sym); + if (priv->hist == NULL) + continue; + + annotate_sym(he); + count++; + /* + * Since we have a hist_entry per IP for the same symbol, free + * he->sym->hist to signal we already processed this symbol. + */ + free(priv->hist); + priv->hist = NULL; } if (!count) @@ -593,7 +642,7 @@ static int __cmd_annotate(void) exit(0); } - if (load_kernel() < 0) { + if (load_kernel(sizeof(struct sym_priv), symbol_filter) < 0) { perror("failed to load kernel symbols"); return EXIT_FAILURE; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index a4f8cc2..bee207c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -680,7 +680,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { - struct map *map = map__new(&event->mmap, cwd, cwdlen); + struct map *map = map__new(&event->mmap, cwd, cwdlen, 0, NULL, verbose); struct thread *thread = threads__findnew(event->mmap.pid); dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index 242b055..18accb8 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -130,7 +130,7 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, if (curr_handler->sample_type_check(sample_type) < 0) exit(-1); - if (load_kernel() < 0) { + if (load_kernel(0, NULL) < 0) { perror("failed to load kernel symbols"); return EXIT_FAILURE; } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 6b5be56..db59c8b 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -101,7 +101,13 @@ static inline u64 identity__map_ip(struct map *map __used, u64 ip) return ip; } -struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen); +struct symbol; + +typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); + +struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, + unsigned int sym_priv_size, symbol_filter_t filter, + int v); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 4e203d1..55079c0 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -3,6 +3,7 @@ #include #include #include +#include "debug.h" static inline int is_anon_memory(const char *filename) { @@ -19,7 +20,9 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen) return n; } - struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen) +struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, + unsigned int sym_priv_size, symbol_filter_t filter, + int v) { struct map *self = malloc(sizeof(*self)); @@ -27,6 +30,7 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen) const char *filename = event->filename; char newfilename[PATH_MAX]; int anon; + bool new_dso; if (cwd) { int n = strcommon(filename, cwd, cwdlen); @@ -49,10 +53,23 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen) self->end = event->start + event->len; self->pgoff = event->pgoff; - self->dso = dsos__findnew(filename); + self->dso = dsos__findnew(filename, sym_priv_size, &new_dso); if (self->dso == NULL) goto out_delete; + if (new_dso) { + int nr = dso__load(self->dso, self, filter, v); + + if (nr < 0) + eprintf("Failed to open %s, continuing " + "without symbols\n", + self->dso->long_name); + else if (nr == 0) + eprintf("No symbols found in %s, maybe " + "install a debug package?\n", + self->dso->long_name); + } + if (self->dso == vdso || anon) self->map_ip = self->unmap_ip = identity__map_ip; else { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 3350119..0a48984 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -11,8 +11,6 @@ #include #include -const char *sym_hist_filter; - enum dso_origin { DSO__ORIG_KERNEL = 0, DSO__ORIG_JAVA_JIT, @@ -86,22 +84,16 @@ static struct symbol *symbol__new(u64 start, u64 len, const char *name, if (!self) return NULL; - if (v > 2) - printf("new symbol: %016Lx [%08lx]: %s, hist: %p\n", - start, (unsigned long)len, name, self->hist); - - self->hist = NULL; - self->hist_sum = 0; - - if (sym_hist_filter && !strcmp(name, sym_hist_filter)) - self->hist = calloc(sizeof(u64), len); - if (priv_size) { memset(self, 0, priv_size); self = ((void *)self) + priv_size; } self->start = start; self->end = len ? start + len - 1 : start; + + if (v > 2) + printf("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end); + memcpy(self->name, name, namelen); return self; @@ -919,7 +911,8 @@ char dso__symtab_origin(const struct dso *self) return origin[self->origin]; } -int dso__load(struct dso *self, struct map *map, symbol_filter_t filter, int v) +int dso__load(struct dso *self, struct map *map, + symbol_filter_t filter, int v) { int size = PATH_MAX; char *name = malloc(size), *build_id = NULL; @@ -1335,33 +1328,21 @@ static struct dso *dsos__find(const char *name) return NULL; } -struct dso *dsos__findnew(const char *name) +struct dso *dsos__findnew(const char *name, unsigned int sym_priv_size, + bool *is_new) { struct dso *dso = dsos__find(name); - int nr; - - if (dso) - return dso; - - dso = dso__new(name, 0); - if (!dso) - goto out_delete_dso; - nr = dso__load(dso, NULL, NULL, verbose); - if (nr < 0) { - eprintf("Failed to open: %s\n", name); - goto out_delete_dso; - } - if (!nr) - eprintf("No symbols found in: %s, maybe install a debug package?\n", name); - - dsos__add(dso); + if (!dso) { + dso = dso__new(name, sym_priv_size); + if (dso) { + dsos__add(dso); + *is_new = true; + } + } else + *is_new = false; return dso; - -out_delete_dso: - dso__delete(dso); - return NULL; } void dsos__fprintf(FILE *fp) @@ -1372,9 +1353,10 @@ void dsos__fprintf(FILE *fp) dso__fprintf(pos, fp); } -int load_kernel(void) +int load_kernel(unsigned int sym_priv_size, symbol_filter_t filter) { - if (dsos__load_kernel(vmlinux_name, 0, NULL, verbose, modules) <= 0) + if (dsos__load_kernel(vmlinux_name, sym_priv_size, + filter, verbose, modules) <= 0) return -1; vdso = dso__new("[vdso]", 0); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2e4522e..c2a777d 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -2,6 +2,7 @@ #define __PERF_SYMBOL 1 #include +#include #include "types.h" #include #include @@ -35,9 +36,6 @@ struct symbol { struct rb_node rb_node; u64 start; u64 end; - u64 hist_sum; - u64 *hist; - void *priv; char name[0]; }; @@ -54,10 +52,6 @@ struct dso { char name[0]; }; -extern const char *sym_hist_filter; - -typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); - struct dso *dso__new(const char *name, unsigned int sym_priv_size); void dso__delete(struct dso *self); @@ -70,15 +64,16 @@ struct symbol *dso__find_symbol(struct dso *self, u64 ip); int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, symbol_filter_t filter, int verbose, int modules); -int dso__load(struct dso *self, struct map *map, symbol_filter_t filter, - int verbose); -struct dso *dsos__findnew(const char *name); +struct dso *dsos__findnew(const char *name, unsigned int sym_priv_size, + bool *is_new); +int dso__load(struct dso *self, struct map *map, + symbol_filter_t filter, int v); void dsos__fprintf(FILE *fp); size_t dso__fprintf(struct dso *self, FILE *fp); char dso__symtab_origin(const struct dso *self); -int load_kernel(void); +int load_kernel(unsigned int sym_priv_size, symbol_filter_t filter); void symbol__init(void); -- cgit v0.10.2 From 8f0b037398a909ccf703ad5f5803066db6327f22 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2009 15:08:29 -0200 Subject: perf annotate: Remove requirement of passing a symbol name If the user doesn't pass a symbol name to annotate, it will annotate all the symbols that have hits, in order, just like 'perf report -s comm,dso,symbol'. This is a natural followup patch to the one that uses output_hists to find the symbols with hits. The common case is to annotate the first few entries at the top of a perf report, so lets type less characters. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256058509-19678-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2456925..99bac6a 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -57,7 +57,8 @@ static const char *sym_hist_filter; static int symbol_filter(struct map *map, struct symbol *sym) { - if (strcmp(sym->name, sym_hist_filter) == 0) { + if (sym_hist_filter == NULL || + strcmp(sym->name, sym_hist_filter) == 0) { struct sym_priv *priv = dso__sym_priv(map->dso, sym); const int size = (sizeof(*priv->hist) + (sym->end - sym->start) * sizeof(u64)); @@ -581,7 +582,6 @@ static void annotate_sym(struct hist_entry *he) static void find_annotations(void) { struct rb_node *nd; - int count = 0; for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); @@ -595,7 +595,6 @@ static void find_annotations(void) continue; annotate_sym(he); - count++; /* * Since we have a hist_entry per IP for the same symbol, free * he->sym->hist to signal we already processed this symbol. @@ -603,9 +602,6 @@ static void find_annotations(void) free(priv->hist); priv->hist = NULL; } - - if (!count) - printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter); } static int __cmd_annotate(void) @@ -793,9 +789,6 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) sym_hist_filter = argv[0]; } - if (!sym_hist_filter) - usage_with_options(annotate_usage, options); - setup_pager(); if (field_sep && *field_sep == '.') { -- cgit v0.10.2 From c88e4bf60de6253a048cf4e6b3b0715e543e0460 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 20 Oct 2009 15:54:55 -0200 Subject: perf top: Fix symbol annotation We need to use map->unmap_ip() here too to match section relative symbol address to the absolute address needed to match objdump -dS addresses. Reported-by: Mike Galbraith Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1256061295-19835-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index cc66286..fa20345 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -141,7 +141,8 @@ static void parse_source(struct sym_entry *syme) sprintf(command, "objdump --start-address=0x%016Lx " "--stop-address=0x%016Lx -dS %s", - sym->start, sym->end, path); + map->unmap_ip(map, sym->start), + map->unmap_ip(map, sym->end), path); file = popen(command, "r"); if (!file) @@ -173,11 +174,11 @@ static void parse_source(struct sym_entry *syme) if (strlen(src->line)>8 && src->line[8] == ':') { src->eip = strtoull(src->line, NULL, 16); - src->eip += map->start; + src->eip = map->unmap_ip(map, src->eip); } if (strlen(src->line)>8 && src->line[16] == ':') { src->eip = strtoull(src->line, NULL, 16); - src->eip += map->start; + src->eip = map->unmap_ip(map, src->eip); } } pclose(file); -- cgit v0.10.2 From 06ed6ba5ecb771cc3a967838a4bb1d9cbd8786b9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 20 Oct 2009 12:55:24 -0400 Subject: x86: Fix group attribute decoding bug Fix a typo in inat_get_group_attribute() which should refer inat_group_tables, not inat_escape_tables. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Jim Keniston Cc: Frederic Weisbecker LKML-Reference: <20091020165524.4145.97333.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 054656a..3fb5998 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c @@ -68,7 +68,7 @@ insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, if (!table) return inat_group_common_attribute(grp_attr); if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) { - table = inat_escape_tables[n][m]; + table = inat_group_tables[n][m]; if (!table) return inat_group_common_attribute(grp_attr); } -- cgit v0.10.2 From 9983d60d74db9e544c6cb6f65351849fe8e9c1de Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 20 Oct 2009 12:55:31 -0400 Subject: x86: Add AES opcodes to opcode map Add Intel AES opcodes to x86 opcode map. These opcodes are used in arch/x86/crypt/aesni-intel_asm.S. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Jim Keniston Cc: Frederic Weisbecker LKML-Reference: <20091020165531.4145.21872.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 894497f..701c467 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -567,7 +567,7 @@ fe: paddd Pq,Qq | paddd Vdq,Wdq (66) ff: EndTable -Table: 3-byte opcode 1 +Table: 3-byte opcode 1 (0x0f 0x38) Referrer: 3-byte escape 1 # 0x0f 0x38 0x00-0x0f 00: pshufb Pq,Qq | pshufb Vdq,Wdq (66) @@ -642,11 +642,16 @@ Referrer: 3-byte escape 1 41: phminposuw Vdq,Wdq (66) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) +db: aesimc Vdq,Wdq (66) +dc: aesenc Vdq,Wdq (66) +dd: aesenclast Vdq,Wdq (66) +de: aesdec Vdq,Wdq (66) +df: aesdeclast Vdq,Wdq (66) f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) EndTable -Table: 3-byte opcode 2 +Table: 3-byte opcode 2 (0x0f 0x3a) Referrer: 3-byte escape 2 # 0x0f 0x3a 0x00-0xff 08: roundps Vdq,Wdq,Ib (66) @@ -671,6 +676,7 @@ Referrer: 3-byte escape 2 61: pcmpestri Vdq,Wdq,Ib (66) 62: pcmpistrm Vdq,Wdq,Ib (66) 63: pcmpistri Vdq,Wdq,Ib (66) +df: aeskeygenassist Vdq,Wdq,Ib (66) EndTable GrpTable: Grp1 -- cgit v0.10.2 From 60d526f7fa6246b8e32d5b45610d625a5608d988 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 20 Oct 2009 19:19:34 -0400 Subject: perf tools: Add 'make DEBUG=1' to remove the -O6 cflag When using gdb to debug perf, it is practically impossible to use when perf is compiled with -O6. For developers, this patch adds the DEBUG feature to the make command line so that a developer can easily remove the optimization flag. LKML-Reference: <1255590330.8392.446.camel@twins> Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091020232033.984323261@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 64c6b7b..65e6e52 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -201,7 +201,14 @@ EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wold-style-definition EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wstrict-prototypes EXTRA_WARNINGS := $(EXTRA_WARNINGS) -Wdeclaration-after-statement -CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -fstack-protector-all -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) +ifeq ("$(origin DEBUG)", "command line") + PERF_DEBUG = $(DEBUG) +endif +ifndef PERF_DEBUG + CFLAGS_OPTIMIZE = -O6 +endif + +CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -fstack-protector-all -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) -- cgit v0.10.2 From 4e3b799d7dbb2a12ca8dca8d3594d32095772973 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 20 Oct 2009 19:19:35 -0400 Subject: perf tools: Use strsep() over strtok_r() for parsing single line The second argument in the strtok_r() function is not to be used generically and can have different implementations. Currently the function parsing of the perf trace code uses the second argument to copy data from. This can crash the tool or just have unpredictable results. The correct solution is to use strsep() which has a defined result. I also added a check to see if the result was correct, and will break out of the loop in case it fails to parse as expected. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Cc: Peter Zijlstra Cc: Frederic Weisbecker LKML-Reference: <20091020232034.237814877@goodmis.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 4b61b49..eae5605 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -286,16 +286,19 @@ void parse_ftrace_printk(char *file, unsigned int size __unused) char *line; char *next = NULL; char *addr_str; - char *fmt; int i; line = strtok_r(file, "\n", &next); while (line) { + addr_str = strsep(&line, ":"); + if (!line) { + warning("error parsing print strings"); + break; + } item = malloc_or_die(sizeof(*item)); - addr_str = strtok_r(line, ":", &fmt); item->addr = strtoull(addr_str, NULL, 16); /* fmt still has a space, skip it */ - item->printk = strdup(fmt+1); + item->printk = strdup(line+1); item->next = list; list = item; line = strtok_r(NULL, "\n", &next); -- cgit v0.10.2 From 9bf4e7fba8006d19846fec877b6da0616b2772de Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 21 Oct 2009 14:39:51 +0200 Subject: x86, instruction decoder: Fix test_get_len build rules Add the kernel source include file as well to the include files search path, to fix this build bug: In file included from arch/x86/tools/test_get_len.c:28: arch/x86/lib/insn.c:21:26: error: linux/string.h: No such file or directory Cc: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Jim Keniston Cc: Frederic Weisbecker LKML-Reference: <20091020165531.4145.21872.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 1bd006c..5e295d9 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -8,8 +8,8 @@ posttest: $(obj)/test_get_len vmlinux hostprogs-y := test_get_len # -I needed for generated C source and C source which in the kernel tree. -HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ +HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/ -# Dependancies are also needed. +# Dependencies are also needed. $(obj)/test_get_len.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c -- cgit v0.10.2 From af0a6fa46388e1e0c2d1a672aad84f8f6ef0b20b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 22 Oct 2009 23:23:22 +0200 Subject: perf tools: Fix missing top level callchain While recursively printing the branches of each callchains, we forget to display the root. It is never printed. Say we have: symbol f1 f2 | -------- f3 | f4 | ---------f5 f6 Actually we never see that, instead it displays: symbol | --------- f3 | f4 | --------- f5 f6 However f1 is always the same than "symbol" and if we are sorting by symbols first then "symbol", f1 and f2 will be well aligned like in the above example, so displaying f1 looks redundant here. But if we are sorting by something else first (dso, comm, etc...), displaying f1 doesn't look redundant but rather necessary because the symbol is not well aligned anymore with its callchain: comm dso symbol f1 f2 | --------- [...] And we want the callchain to be obvious. So we fix the bug by printing the root branch, but we also filter its first entry if we are sorting by symbols first. Reported-by: Anton Blanchard Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1256246604-17156-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bee207c..3d8c522 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -122,8 +122,8 @@ static void init_rem_hits(void) } static size_t -callchain__fprintf_graph(FILE *fp, struct callchain_node *self, - u64 total_samples, int depth, int depth_mask) +__callchain__fprintf_graph(FILE *fp, struct callchain_node *self, + u64 total_samples, int depth, int depth_mask) { struct rb_node *node, *next; struct callchain_node *child; @@ -174,9 +174,9 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, new_total, cumul); } - ret += callchain__fprintf_graph(fp, child, new_total, - depth + 1, - new_depth_mask | (1 << depth)); + ret += __callchain__fprintf_graph(fp, child, new_total, + depth + 1, + new_depth_mask | (1 << depth)); node = next; } @@ -197,6 +197,33 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, } static size_t +callchain__fprintf_graph(FILE *fp, struct callchain_node *self, + u64 total_samples) +{ + struct callchain_list *chain; + int i = 0; + int ret = 0; + + list_for_each_entry(chain, &self->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + + if (!i++ && sort_by_sym_first) + continue; + + if (chain->sym) + ret += fprintf(fp, " %s\n", chain->sym->name); + else + ret += fprintf(fp, " %p\n", + (void *)(long)chain->ip); + } + + ret += __callchain__fprintf_graph(fp, self, total_samples, 1, 1); + + return ret; +} + +static size_t callchain__fprintf_flat(FILE *fp, struct callchain_node *self, u64 total_samples) { @@ -244,8 +271,7 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, break; case CHAIN_GRAPH_ABS: /* Falldown */ case CHAIN_GRAPH_REL: - ret += callchain__fprintf_graph(fp, chain, - total_samples, 1, 1); + ret += callchain__fprintf_graph(fp, chain, total_samples); case CHAIN_NONE: default: break; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 40c9acd..60ced70 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -5,8 +5,9 @@ char default_parent_pattern[] = "^sys_|^do_page_fault"; char *parent_pattern = default_parent_pattern; char default_sort_order[] = "comm,dso,symbol"; char *sort_order = default_sort_order; -int sort__need_collapse = 0; -int sort__has_parent = 0; +int sort__need_collapse = 0; +int sort__has_parent = 0; +int sort_by_sym_first; unsigned int dsos__col_width; unsigned int comms__col_width; @@ -265,6 +266,10 @@ int sort_dimension__add(const char *tok) sort__has_parent = 1; } + if (list_empty(&hist_entry__sort_list) && + !strcmp(sd->name, "symbol")) + sort_by_sym_first = true; + list_add_tail(&sd->entry->list, &hist_entry__sort_list); sd->taken = 1; @@ -273,4 +278,3 @@ int sort_dimension__add(const char *tok) return -ESRCH; } - diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 13806d7..24c2b70 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -39,6 +39,7 @@ extern struct sort_entry sort_parent; extern unsigned int dsos__col_width; extern unsigned int comms__col_width; extern unsigned int threads__col_width; +extern int sort_by_sym_first; struct hist_entry { struct rb_node rb_node; -- cgit v0.10.2 From a4fb581b15949cfd10b64c8af37bc106e95307f3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 22 Oct 2009 23:23:23 +0200 Subject: perf tools: Bind callchains to the first sort dimension column Currently, the callchains are displayed using a constant left margin. So depending on the current sort dimension configuration, callchains may appear to be well attached to the first sort dimension column field which is mostly the case, except when the first dimension of sorting is done by comm, because these are right aligned. This patch binds the callchain to the first letter in the first column, whatever type of column it is (dso, comm, symbol). Before: 0.80% perf [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify | | __fsnotify_parent After: 0.80% perf [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify | | __fsnotify_parent Also, for clarity, we don't put anymore the callchain as is but: - If we have a top level ancestor in the callchain, start it with a first ascii hook. Before: 0.80% perf [kernel] [k] __lock_acquire __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify [..] [..] After: 0.80% perf [kernel] [k] __lock_acquire | --- __lock_acquire lock_acquire | |--58.33%-- _spin_lock | | | |--28.57%-- inotify_should_send_event | | fsnotify [..] [..] - Otherwise, if we have several top level ancestors, then display these like we did before: 1.69% Xorg | |--21.21%-- vread_hpet | 0x7fffd85b46fc | 0x7fffd85b494d | 0x7f4fafb4e54d | |--15.15%-- exaOffscreenAlloc | |--9.09%-- I830WaitLpRing Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard LKML-Reference: <1256246604-17156-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 3d8c522..72d5842 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -59,12 +59,28 @@ static struct perf_header *header; static u64 sample_type; -static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask) + +static size_t +callchain__fprintf_left_margin(FILE *fp, int left_margin) +{ + int i; + int ret; + + ret = fprintf(fp, " "); + + for (i = 0; i < left_margin; i++) + ret += fprintf(fp, " "); + + return ret; +} + +static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, + int left_margin) { int i; size_t ret = 0; - ret += fprintf(fp, "%s", " "); + ret += callchain__fprintf_left_margin(fp, left_margin); for (i = 0; i < depth; i++) if (depth_mask & (1 << i)) @@ -79,12 +95,12 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask) static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth, int depth_mask, int count, u64 total_samples, - int hits) + int hits, int left_margin) { int i; size_t ret = 0; - ret += fprintf(fp, "%s", " "); + ret += callchain__fprintf_left_margin(fp, left_margin); for (i = 0; i < depth; i++) { if (depth_mask & (1 << i)) ret += fprintf(fp, "|"); @@ -123,7 +139,8 @@ static void init_rem_hits(void) static size_t __callchain__fprintf_graph(FILE *fp, struct callchain_node *self, - u64 total_samples, int depth, int depth_mask) + u64 total_samples, int depth, int depth_mask, + int left_margin) { struct rb_node *node, *next; struct callchain_node *child; @@ -164,7 +181,8 @@ __callchain__fprintf_graph(FILE *fp, struct callchain_node *self, * But we keep the older depth mask for the line seperator * to keep the level link until we reach the last child */ - ret += ipchain__fprintf_graph_line(fp, depth, depth_mask); + ret += ipchain__fprintf_graph_line(fp, depth, depth_mask, + left_margin); i = 0; list_for_each_entry(chain, &child->val, list) { if (chain->ip >= PERF_CONTEXT_MAX) @@ -172,11 +190,13 @@ __callchain__fprintf_graph(FILE *fp, struct callchain_node *self, ret += ipchain__fprintf_graph(fp, chain, depth, new_depth_mask, i++, new_total, - cumul); + cumul, + left_margin); } ret += __callchain__fprintf_graph(fp, child, new_total, depth + 1, - new_depth_mask | (1 << depth)); + new_depth_mask | (1 << depth), + left_margin); node = next; } @@ -190,17 +210,19 @@ __callchain__fprintf_graph(FILE *fp, struct callchain_node *self, ret += ipchain__fprintf_graph(fp, &rem_hits, depth, new_depth_mask, 0, new_total, - remaining); + remaining, left_margin); } return ret; } + static size_t callchain__fprintf_graph(FILE *fp, struct callchain_node *self, - u64 total_samples) + u64 total_samples, int left_margin) { struct callchain_list *chain; + bool printed = false; int i = 0; int ret = 0; @@ -208,17 +230,27 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self, if (chain->ip >= PERF_CONTEXT_MAX) continue; - if (!i++ && sort_by_sym_first) + if (!i++ && sort__first_dimension == SORT_SYM) continue; + if (!printed) { + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "|\n"); + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "---"); + + left_margin += 3; + printed = true; + } else + ret += callchain__fprintf_left_margin(fp, left_margin); + if (chain->sym) - ret += fprintf(fp, " %s\n", chain->sym->name); + ret += fprintf(fp, " %s\n", chain->sym->name); else - ret += fprintf(fp, " %p\n", - (void *)(long)chain->ip); + ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); } - ret += __callchain__fprintf_graph(fp, self, total_samples, 1, 1); + ret += __callchain__fprintf_graph(fp, self, total_samples, 1, 1, left_margin); return ret; } @@ -251,7 +283,7 @@ callchain__fprintf_flat(FILE *fp, struct callchain_node *self, static size_t hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, - u64 total_samples) + u64 total_samples, int left_margin) { struct rb_node *rb_node; struct callchain_node *chain; @@ -271,7 +303,8 @@ hist_entry_callchain__fprintf(FILE *fp, struct hist_entry *self, break; case CHAIN_GRAPH_ABS: /* Falldown */ case CHAIN_GRAPH_REL: - ret += callchain__fprintf_graph(fp, chain, total_samples); + ret += callchain__fprintf_graph(fp, chain, total_samples, + left_margin); case CHAIN_NONE: default: break; @@ -316,8 +349,19 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) ret += fprintf(fp, "\n"); - if (callchain) - hist_entry_callchain__fprintf(fp, self, total_samples); + if (callchain) { + int left_margin = 0; + + if (sort__first_dimension == SORT_COMM) { + se = list_first_entry(&hist_entry__sort_list, typeof(*se), + list); + left_margin = se->width ? *se->width : 0; + left_margin -= thread__comm_len(self->thread); + } + + hist_entry_callchain__fprintf(fp, self, total_samples, + left_margin); + } return ret; } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 60ced70..b490354 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -7,7 +7,8 @@ char default_sort_order[] = "comm,dso,symbol"; char *sort_order = default_sort_order; int sort__need_collapse = 0; int sort__has_parent = 0; -int sort_by_sym_first; + +enum sort_type sort__first_dimension; unsigned int dsos__col_width; unsigned int comms__col_width; @@ -266,9 +267,18 @@ int sort_dimension__add(const char *tok) sort__has_parent = 1; } - if (list_empty(&hist_entry__sort_list) && - !strcmp(sd->name, "symbol")) - sort_by_sym_first = true; + if (list_empty(&hist_entry__sort_list)) { + if (!strcmp(sd->name, "pid")) + sort__first_dimension = SORT_PID; + else if (!strcmp(sd->name, "comm")) + sort__first_dimension = SORT_COMM; + else if (!strcmp(sd->name, "dso")) + sort__first_dimension = SORT_DSO; + else if (!strcmp(sd->name, "symbol")) + sort__first_dimension = SORT_SYM; + else if (!strcmp(sd->name, "parent")) + sort__first_dimension = SORT_PARENT; + } list_add_tail(&sd->entry->list, &hist_entry__sort_list); sd->taken = 1; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 24c2b70..333e664 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -39,7 +39,7 @@ extern struct sort_entry sort_parent; extern unsigned int dsos__col_width; extern unsigned int comms__col_width; extern unsigned int threads__col_width; -extern int sort_by_sym_first; +extern enum sort_type sort__first_dimension; struct hist_entry { struct rb_node rb_node; @@ -54,6 +54,14 @@ struct hist_entry { struct rb_root sorted_chain; }; +enum sort_type { + SORT_PID, + SORT_COMM, + SORT_DSO, + SORT_SYM, + SORT_PARENT +}; + /* * configurable sorting bits */ diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index f53fad7..8cb47f1 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -33,6 +33,17 @@ int thread__set_comm(struct thread *self, const char *comm) return self->comm ? 0 : -ENOMEM; } +int thread__comm_len(struct thread *self) +{ + if (!self->comm_len) { + if (!self->comm) + return 0; + self->comm_len = strlen(self->comm); + } + + return self->comm_len; +} + static size_t thread__fprintf(struct thread *self, FILE *fp) { struct rb_node *nd; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 1abef3b..53addd7 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -12,9 +12,11 @@ struct thread { pid_t pid; char shortname[3]; char *comm; + int comm_len; }; int thread__set_comm(struct thread *self, const char *comm); +int thread__comm_len(struct thread *self); struct thread *threads__findnew(pid_t pid); struct thread *register_idle_thread(void); void thread__insert_map(struct thread *self, struct map *map); -- cgit v0.10.2 From 802da5f2289bbe363acef084805195c11f453c48 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 22 Oct 2009 23:23:24 +0200 Subject: perf tools: Drop asm/types.h wrapper Wrapping the kernel headers is dangerous when it comes to arch headers. Once we wrap asm/types.h, it will also replace the glibc asm/types.h, not only the kernel one. This results in build errors on some machines. Drop this wrapper and do its work from linux/types.h wrapper, also the glibc asm/types.h can already handle most of the type definition it was doing (typedef __u64, __u32, etc...). Todo: Check the others asm/*.h wrappers to prevent from other conflicts. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Anton Blanchard LKML-Reference: <1256246604-17156-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 65e6e52..0a40c29 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -352,7 +352,6 @@ LIB_H += util/include/asm/bitops.h LIB_H += util/include/asm/byteorder.h LIB_H += util/include/asm/swab.h LIB_H += util/include/asm/system.h -LIB_H += util/include/asm/types.h LIB_H += util/include/asm/uaccess.h LIB_H += perf.h LIB_H += util/event.h diff --git a/tools/perf/util/include/asm/bitops.h b/tools/perf/util/include/asm/bitops.h index fbe4d92..58e9817 100644 --- a/tools/perf/util/include/asm/bitops.h +++ b/tools/perf/util/include/asm/bitops.h @@ -1,6 +1,18 @@ +#ifndef _PERF_ASM_BITOPS_H_ +#define _PERF_ASM_BITOPS_H_ + +#include +#include "../../types.h" +#include + +/* CHECKME: Not sure both always match */ +#define BITS_PER_LONG __WORDSIZE + #include "../../../../include/asm-generic/bitops/__fls.h" #include "../../../../include/asm-generic/bitops/fls.h" #include "../../../../include/asm-generic/bitops/fls64.h" #include "../../../../include/asm-generic/bitops/__ffs.h" #include "../../../../include/asm-generic/bitops/ffz.h" #include "../../../../include/asm-generic/bitops/hweight.h" + +#endif diff --git a/tools/perf/util/include/asm/byteorder.h b/tools/perf/util/include/asm/byteorder.h index 39f367cf..b722abe 100644 --- a/tools/perf/util/include/asm/byteorder.h +++ b/tools/perf/util/include/asm/byteorder.h @@ -1,2 +1,2 @@ -#include "../asm/types.h" +#include #include "../../../../include/linux/swab.h" diff --git a/tools/perf/util/include/asm/types.h b/tools/perf/util/include/asm/types.h deleted file mode 100644 index 06703c6..0000000 --- a/tools/perf/util/include/asm/types.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef PERF_ASM_TYPES_H_ -#define PERF_ASM_TYPES_H_ - -#include -#include "../../types.h" -#include - -/* CHECKME: Not sure both always match */ -#define BITS_PER_LONG __WORDSIZE - -typedef u64 __u64; -typedef u32 __u32; -typedef u16 __u16; -typedef u8 __u8; -typedef s64 __s64; - -#endif /* PERF_ASM_TYPES_H_ */ diff --git a/tools/perf/util/include/linux/types.h b/tools/perf/util/include/linux/types.h index 858a38d..196862a 100644 --- a/tools/perf/util/include/linux/types.h +++ b/tools/perf/util/include/linux/types.h @@ -1,6 +1,8 @@ #ifndef _PERF_LINUX_TYPES_H_ #define _PERF_LINUX_TYPES_H_ +#include + #define DECLARE_BITMAP(name,bits) \ unsigned long name[BITS_TO_LONGS(bits)] -- cgit v0.10.2 From 6beba7adbe092e63dfe8d09fbd1e3ec140474a13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 21 Oct 2009 17:34:06 -0200 Subject: perf tools: Unify debug messages mechanisms We were using eprintf in some places, that looks at a global 'verbose' level, and at other places passing a 'v' parameter to specify the verbosity level, unify it by introducing pr_{err,warning,debug,etc}, just like in the kernel. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256153646-10097-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 99bac6a..6d63c2e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -203,8 +203,7 @@ static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { struct map *map = map__new(&event->mmap, NULL, 0, - sizeof(struct sym_priv), symbol_filter, - verbose); + sizeof(struct sym_priv), symbol_filter); struct thread *thread = threads__findnew(event->mmap.pid); dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n", diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f0467ff..ac5ddff 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -630,7 +630,7 @@ static int __cmd_record(int argc, const char **argv) param.sched_priority = realtime_prio; if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { - printf("Could not set realtime priority.\n"); + pr_err("Could not set realtime priority.\n"); exit(-1); } } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 72d5842..b3d814b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -689,7 +689,8 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf("... chain: nr:%Lu\n", chain->nr); if (validate_chain(chain, event) < 0) { - eprintf("call-chain problem with event, skipping it.\n"); + pr_debug("call-chain problem with event, " + "skipping it.\n"); return 0; } @@ -700,7 +701,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) } if (thread == NULL) { - eprintf("problem processing %d event, skipping it.\n", + pr_debug("problem processing %d event, skipping it.\n", event->header.type); return -1; } @@ -738,7 +739,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (hist_entry__add(thread, map, sym, ip, chain, level, period)) { - eprintf("problem incrementing symbol count, skipping event\n"); + pr_debug("problem incrementing symbol count, skipping event\n"); return -1; } @@ -750,7 +751,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { - struct map *map = map__new(&event->mmap, cwd, cwdlen, 0, NULL, verbose); + struct map *map = map__new(&event->mmap, cwd, cwdlen, 0, NULL); struct thread *thread = threads__findnew(event->mmap.pid); dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 807ca66..9a48d96 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1666,8 +1666,8 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) (long long)period); if (thread == NULL) { - eprintf("problem processing %d event, skipping it.\n", - event->header.type); + pr_debug("problem processing %d event, skipping it.\n", + event->header.type); return -1; } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 34fad57..0a2f222 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1162,12 +1162,10 @@ more: size = event->header.size; if (!size || process_event(event) < 0) { - - printf("%p [%p]: skipping unknown header type: %d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type); - + pr_warning("%p [%p]: skipping unknown header type: %d\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->header.type); /* * assume we lost track of the stream, check alignment, and * increment a single u64 in the hope to catch on again 'soon'. @@ -1200,7 +1198,8 @@ done: write_svg_file(output_name); - printf("Written %2.1f seconds of trace to %s.\n", (last_time - first_time) / 1000000000.0, output_name); + pr_info("Written %2.1f seconds of trace to %s.\n", + (last_time - first_time) / 1000000000.0, output_name); return rc; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index fa20345..4a9fe22 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -809,7 +809,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) static int parse_symbols(void) { if (dsos__load_kernel(vmlinux_name, sizeof(struct sym_entry), - symbol_filter, verbose, 1) <= 0) + symbol_filter, 1) <= 0) return -1; if (dump_symtab) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 4c129ff..e566bbe 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -81,8 +81,8 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) (long long)period); if (thread == NULL) { - eprintf("problem processing %d event, skipping it.\n", - event->header.type); + pr_debug("problem processing %d event, skipping it.\n", + event->header.type); return -1; } diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 3b8380f..b3b7125 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -206,7 +206,7 @@ fill_node(struct callchain_node *node, struct ip_callchain *chain, } node->val_nr = chain->nr - start; if (!node->val_nr) - printf("Warning: empty node in callchain tree\n"); + pr_warning("Warning: empty node in callchain tree\n"); } static void diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index e8ca98f..28d520d 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -13,12 +13,12 @@ int verbose = 0; int dump_trace = 0; -int eprintf(const char *fmt, ...) +int eprintf(int level, const char *fmt, ...) { va_list args; int ret = 0; - if (verbose) { + if (verbose >= level) { va_start(args, fmt); ret = vfprintf(stderr, fmt, args); va_end(args); diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 02d1fa1..e8b18a1 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -5,7 +5,8 @@ extern int verbose; extern int dump_trace; -int eprintf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +int eprintf(int level, + const char *fmt, ...) __attribute__((format(printf, 2, 3))); int dump_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); void trace_event(event_t *event); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index db59c8b..d972b4b 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -106,8 +106,7 @@ struct symbol; typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, - unsigned int sym_priv_size, symbol_filter_t filter, - int v); + unsigned int sym_priv_size, symbol_filter_t filter); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 622c60e..7d26659 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -93,7 +93,7 @@ static struct perf_trace_event_type *events; void perf_header__push_event(u64 id, const char *name) { if (strlen(name) > MAX_EVENT_NAME) - printf("Event %s will be truncated\n", name); + pr_warning("Event %s will be truncated\n", name); if (!events) { events = malloc(sizeof(struct perf_trace_event_type)); diff --git a/tools/perf/util/include/linux/kernel.h b/tools/perf/util/include/linux/kernel.h index 4b9204d..21c0274 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/perf/util/include/linux/kernel.h @@ -85,4 +85,21 @@ simple_strtoul(const char *nptr, char **endptr, int base) return strtoul(nptr, endptr, base); } +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif + +#define pr_err(fmt, ...) \ + do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) +#define pr_warning(fmt, ...) \ + do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) +#define pr_info(fmt, ...) \ + do { fprintf(stderr, pr_fmt(fmt), ##__VA_ARGS__); } while (0) +#define pr_debug(fmt, ...) \ + eprintf(1, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_debugN(n, fmt, ...) \ + eprintf(n, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__) +#define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__) + #endif diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 55079c0..c1c5568 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -21,8 +21,7 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen) } struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, - unsigned int sym_priv_size, symbol_filter_t filter, - int v) + unsigned int sym_priv_size, symbol_filter_t filter) { struct map *self = malloc(sizeof(*self)); @@ -58,16 +57,16 @@ struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, goto out_delete; if (new_dso) { - int nr = dso__load(self->dso, self, filter, v); + int nr = dso__load(self->dso, self, filter); if (nr < 0) - eprintf("Failed to open %s, continuing " - "without symbols\n", - self->dso->long_name); + pr_warning("Failed to open %s, continuing " + "without symbols\n", + self->dso->long_name); else if (nr == 0) - eprintf("No symbols found in %s, maybe " - "install a debug package?\n", - self->dso->long_name); + pr_warning("No symbols found in %s, maybe " + "install a debug package?\n", + self->dso->long_name); } if (self->dso == vdso || anon) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0a48984..8f0208c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -76,7 +76,7 @@ static void kernel_maps__fixup_end(void) } static struct symbol *symbol__new(u64 start, u64 len, const char *name, - unsigned int priv_size, int v) + unsigned int priv_size) { size_t namelen = strlen(name) + 1; struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); @@ -91,8 +91,7 @@ static struct symbol *symbol__new(u64 start, u64 len, const char *name, self->start = start; self->end = len ? start + len - 1 : start; - if (v > 2) - printf("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end); + pr_debug3("%s: %s %#Lx-%#Lx\n", __func__, name, start, self->end); memcpy(self->name, name, namelen); @@ -209,7 +208,7 @@ size_t dso__fprintf(struct dso *self, FILE *fp) * so that we can in the next step set the symbol ->end address and then * call kernel_maps__split_kallsyms. */ -static int kernel_maps__load_all_kallsyms(int v) +static int kernel_maps__load_all_kallsyms(void) { char *line = NULL; size_t n; @@ -252,7 +251,7 @@ static int kernel_maps__load_all_kallsyms(int v) * Will fix up the end later, when we have all symbols sorted. */ sym = symbol__new(start, 0, symbol_name, - kernel_map->dso->sym_priv_size, v); + kernel_map->dso->sym_priv_size); if (sym == NULL) goto out_delete_line; @@ -300,8 +299,8 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) if (strcmp(map->dso->name, module)) { map = kernel_maps__find_by_dso_name(module); if (!map) { - fputs("/proc/{kallsyms,modules} " - "inconsistency!\n", stderr); + pr_err("/proc/{kallsyms,modules} " + "inconsistency!\n"); return -1; } } @@ -351,10 +350,9 @@ delete_symbol: } -static int kernel_maps__load_kallsyms(symbol_filter_t filter, - int use_modules, int v) +static int kernel_maps__load_kallsyms(symbol_filter_t filter, int use_modules) { - if (kernel_maps__load_all_kallsyms(v)) + if (kernel_maps__load_all_kallsyms()) return -1; dso__fixup_sym_end(kernel_map->dso); @@ -362,9 +360,9 @@ static int kernel_maps__load_kallsyms(symbol_filter_t filter, return kernel_maps__split_kallsyms(filter, use_modules); } -static size_t kernel_maps__fprintf(FILE *fp, int v) +static size_t kernel_maps__fprintf(FILE *fp) { - size_t printed = fprintf(stderr, "Kernel maps:\n"); + size_t printed = fprintf(fp, "Kernel maps:\n"); struct rb_node *nd; for (nd = rb_first(&kernel_maps); nd; nd = rb_next(nd)) { @@ -372,17 +370,17 @@ static size_t kernel_maps__fprintf(FILE *fp, int v) printed += fprintf(fp, "Map:"); printed += map__fprintf(pos, fp); - if (v > 1) { + if (verbose > 1) { printed += dso__fprintf(pos->dso, fp); printed += fprintf(fp, "--\n"); } } - return printed + fprintf(stderr, "END kernel maps\n"); + return printed + fprintf(fp, "END kernel maps\n"); } static int dso__load_perf_map(struct dso *self, struct map *map, - symbol_filter_t filter, int v) + symbol_filter_t filter) { char *line = NULL; size_t n; @@ -420,7 +418,7 @@ static int dso__load_perf_map(struct dso *self, struct map *map, continue; sym = symbol__new(start, size, line + len, - self->sym_priv_size, v); + self->sym_priv_size); if (sym == NULL) goto out_delete_line; @@ -534,7 +532,7 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, * And always look at the original dso, not at debuginfo packages, that * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). */ -static int dso__synthesize_plt_symbols(struct dso *self, int v) +static int dso__synthesize_plt_symbols(struct dso *self) { uint32_t nr_rel_entries, idx; GElf_Sym sym; @@ -618,7 +616,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, int v) "%s@plt", elf_sym__name(&sym, symstrs)); f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size, v); + sympltname, self->sym_priv_size); if (!f) goto out_elf_end; @@ -636,7 +634,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, int v) "%s@plt", elf_sym__name(&sym, symstrs)); f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size, v); + sympltname, self->sym_priv_size); if (!f) goto out_elf_end; @@ -654,14 +652,14 @@ out_close: if (err == 0) return nr; out: - fprintf(stderr, "%s: problems reading %s PLT info.\n", - __func__, self->long_name); + pr_warning("%s: problems reading %s PLT info.\n", + __func__, self->long_name); return 0; } static int dso__load_sym(struct dso *self, struct map *map, const char *name, int fd, symbol_filter_t filter, int kernel, - int kmodule, int v) + int kmodule) { struct map *curr_map = map; struct dso *curr_dso = self; @@ -680,15 +678,12 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); if (elf == NULL) { - if (v) - fprintf(stderr, "%s: cannot read %s ELF file.\n", - __func__, name); + pr_err("%s: cannot read %s ELF file.\n", __func__, name); goto out_close; } if (gelf_getehdr(elf, &ehdr) == NULL) { - if (v) - fprintf(stderr, "%s: cannot get elf header.\n", __func__); + pr_err("%s: cannot get elf header.\n", __func__); goto out_elf_end; } @@ -794,10 +789,9 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, } if (curr_dso->adjust_symbols) { - if (v > 2) - printf("adjusting symbol: st_value: %Lx sh_addr: %Lx sh_offset: %Lx\n", - (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); - + pr_debug2("adjusting symbol: st_value: %Lx sh_addr: " + "%Lx sh_offset: %Lx\n", (u64)sym.st_value, + (u64)shdr.sh_addr, (u64)shdr.sh_offset); sym.st_value -= shdr.sh_addr - shdr.sh_offset; } /* @@ -810,7 +804,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, elf_name = demangled; new_symbol: f = symbol__new(sym.st_value, sym.st_size, elf_name, - curr_dso->sym_priv_size, v); + curr_dso->sym_priv_size); free(demangled); if (!f) goto out_elf_end; @@ -837,7 +831,7 @@ out_close: #define BUILD_ID_SIZE 128 -static char *dso__read_build_id(struct dso *self, int v) +static char *dso__read_build_id(struct dso *self) { int i; GElf_Ehdr ehdr; @@ -854,15 +848,13 @@ static char *dso__read_build_id(struct dso *self, int v) elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); if (elf == NULL) { - if (v) - fprintf(stderr, "%s: cannot read %s ELF file.\n", - __func__, self->long_name); + pr_err("%s: cannot read %s ELF file.\n", __func__, + self->long_name); goto out_close; } if (gelf_getehdr(elf, &ehdr) == NULL) { - if (v) - fprintf(stderr, "%s: cannot get elf header.\n", __func__); + pr_err("%s: cannot get elf header.\n", __func__); goto out_elf_end; } @@ -884,8 +876,7 @@ static char *dso__read_build_id(struct dso *self, int v) ++raw; bid += 2; } - if (v >= 2) - printf("%s(%s): %s\n", __func__, self->long_name, build_id); + pr_debug2("%s(%s): %s\n", __func__, self->long_name, build_id); out_elf_end: elf_end(elf); out_close: @@ -911,8 +902,7 @@ char dso__symtab_origin(const struct dso *self) return origin[self->origin]; } -int dso__load(struct dso *self, struct map *map, - symbol_filter_t filter, int v) +int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) { int size = PATH_MAX; char *name = malloc(size), *build_id = NULL; @@ -925,7 +915,7 @@ int dso__load(struct dso *self, struct map *map, self->adjust_symbols = 0; if (strncmp(self->name, "/tmp/perf-", 10) == 0) { - ret = dso__load_perf_map(self, map, filter, v); + ret = dso__load_perf_map(self, map, filter); self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT : DSO__ORIG_NOT_FOUND; return ret; @@ -946,7 +936,7 @@ more: self->long_name); break; case DSO__ORIG_BUILDID: - build_id = dso__read_build_id(self, v); + build_id = dso__read_build_id(self); if (build_id != NULL) { snprintf(name, size, "/usr/lib/debug/.build-id/%.2s/%s.debug", @@ -967,7 +957,7 @@ more: fd = open(name, O_RDONLY); } while (fd < 0); - ret = dso__load_sym(self, map, name, fd, filter, 0, 0, v); + ret = dso__load_sym(self, map, name, fd, filter, 0, 0); close(fd); /* @@ -977,7 +967,7 @@ more: goto more; if (ret > 0) { - int nr_plt = dso__synthesize_plt_symbols(self, v); + int nr_plt = dso__synthesize_plt_symbols(self); if (nr_plt > 0) ret += nr_plt; } @@ -1025,34 +1015,29 @@ struct map *kernel_maps__find_by_dso_name(const char *name) } static int dso__load_module_sym(struct dso *self, struct map *map, - symbol_filter_t filter, int v) + symbol_filter_t filter) { int err = 0, fd = open(self->long_name, O_RDONLY); if (fd < 0) { - if (v) - fprintf(stderr, "%s: cannot open %s\n", - __func__, self->long_name); + pr_err("%s: cannot open %s\n", __func__, self->long_name); return err; } - err = dso__load_sym(self, map, self->long_name, fd, filter, 0, 1, v); + err = dso__load_sym(self, map, self->long_name, fd, filter, 0, 1); close(fd); return err; } -static int dsos__load_modules_sym_dir(char *dirname, - symbol_filter_t filter, int v) +static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter) { struct dirent *dent; int nr_symbols = 0, err; DIR *dir = opendir(dirname); if (!dir) { - if (v) - fprintf(stderr, "%s: cannot open %s dir\n", __func__, - dirname); + pr_err("%s: cannot open %s dir\n", __func__, dirname); return -1; } @@ -1066,7 +1051,7 @@ static int dsos__load_modules_sym_dir(char *dirname, snprintf(path, sizeof(path), "%s/%s", dirname, dent->d_name); - err = dsos__load_modules_sym_dir(path, filter, v); + err = dsos__load_modules_sym_dir(path, filter); if (err < 0) goto failure; } else { @@ -1092,7 +1077,7 @@ static int dsos__load_modules_sym_dir(char *dirname, if (map->dso->long_name == NULL) goto failure; - err = dso__load_module_sym(map->dso, map, filter, v); + err = dso__load_module_sym(map->dso, map, filter); if (err < 0) goto failure; last = rb_last(&map->dso->syms); @@ -1119,7 +1104,7 @@ failure: return -1; } -static int dsos__load_modules_sym(symbol_filter_t filter, int v) +static int dsos__load_modules_sym(symbol_filter_t filter) { struct utsname uts; char modules_path[PATH_MAX]; @@ -1130,7 +1115,7 @@ static int dsos__load_modules_sym(symbol_filter_t filter, int v) snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel", uts.release); - return dsos__load_modules_sym_dir(modules_path, filter, v); + return dsos__load_modules_sym_dir(modules_path, filter); } /* @@ -1225,15 +1210,14 @@ out_failure: } static int dso__load_vmlinux(struct dso *self, struct map *map, - const char *vmlinux, - symbol_filter_t filter, int v) + const char *vmlinux, symbol_filter_t filter) { int err, fd = open(vmlinux, O_RDONLY); if (fd < 0) return -1; - err = dso__load_sym(self, map, self->long_name, fd, filter, 1, 0, v); + err = dso__load_sym(self, map, self->long_name, fd, filter, 1, 0); close(fd); @@ -1241,7 +1225,7 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, } int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, - symbol_filter_t filter, int v, int use_modules) + symbol_filter_t filter, int use_modules) { int err = -1; struct dso *dso = dso__new(vmlinux, sym_priv_size); @@ -1257,26 +1241,26 @@ int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; if (use_modules && dsos__load_modules(sym_priv_size) < 0) { - fprintf(stderr, "Failed to load list of modules in use! " - "Continuing...\n"); + pr_warning("Failed to load list of modules in use! " + "Continuing...\n"); use_modules = 0; } if (vmlinux) { - err = dso__load_vmlinux(dso, kernel_map, vmlinux, filter, v); + err = dso__load_vmlinux(dso, kernel_map, vmlinux, filter); if (err > 0 && use_modules) { - int syms = dsos__load_modules_sym(filter, v); + int syms = dsos__load_modules_sym(filter); if (syms < 0) - fprintf(stderr, "Failed to read module symbols!" - " Continuing...\n"); + pr_warning("Failed to read module symbols!" + " Continuing...\n"); else err += syms; } } if (err <= 0) - err = kernel_maps__load_kallsyms(filter, use_modules, v); + err = kernel_maps__load_kallsyms(filter, use_modules); if (err > 0) { struct rb_node *node = rb_first(&dso->syms); @@ -1296,8 +1280,8 @@ int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, kernel_maps__fixup_end(); dsos__add(dso); - if (v > 0) - kernel_maps__fprintf(stderr, v); + if (verbose) + kernel_maps__fprintf(stderr); } return err; @@ -1355,8 +1339,8 @@ void dsos__fprintf(FILE *fp) int load_kernel(unsigned int sym_priv_size, symbol_filter_t filter) { - if (dsos__load_kernel(vmlinux_name, sym_priv_size, - filter, verbose, modules) <= 0) + if (dsos__load_kernel(vmlinux_name, sym_priv_size, filter, + modules) <= 0) return -1; vdso = dso__new("[vdso]", 0); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index c2a777d..77b7b3e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -63,11 +63,10 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) struct symbol *dso__find_symbol(struct dso *self, u64 ip); int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, - symbol_filter_t filter, int verbose, int modules); + symbol_filter_t filter, int modules); struct dso *dsos__findnew(const char *name, unsigned int sym_priv_size, bool *is_new); -int dso__load(struct dso *self, struct map *map, - symbol_filter_t filter, int v); +int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); void dsos__fprintf(FILE *fp); size_t dso__fprintf(struct dso *self, FILE *fp); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 8cb47f1..0f6d78c 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -127,9 +127,9 @@ static void thread__remove_overlappings(struct thread *self, struct map *map) continue; if (verbose >= 2) { - printf("overlapping maps:\n"); - map__fprintf(map, stdout); - map__fprintf(pos, stdout); + fputs("overlapping maps:\n", stderr); + map__fprintf(map, stderr); + map__fprintf(pos, stderr); } rb_erase(&pos->rb_node, &self->maps); -- cgit v0.10.2 From b7cb10e790fbd145296e771f789273a875c15719 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 21 Oct 2009 17:34:06 -0200 Subject: perf probe: Print debug messages using pr_*() Use the new pr_{err,warning,debug,etc} printout methods, just like in the kernel. Signed-off-by: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256153646-10097-1-git-send-email-acme@redhat.com> [ Split this patch out, to keep perf/probes separate. ] Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index b5ad86a..dcb406c 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -78,7 +78,7 @@ static int parse_probepoint(const struct option *opt __used, if (!str) /* The end of probe points */ return 0; - eprintf("probe-definition(%d): %s\n", session.nr_probe, str); + pr_debug("probe-definition(%d): %s\n", session.nr_probe, str); if (++session.nr_probe == MAX_PROBES) semantic_error("Too many probes"); @@ -103,7 +103,7 @@ static int parse_probepoint(const struct option *opt __used, die("strndup"); if (++argc == MAX_PROBE_ARGS) semantic_error("Too many arguments"); - eprintf("argv[%d]=%s\n", argc, argv[argc - 1]); + pr_debug("argv[%d]=%s\n", argc, argv[argc - 1]); } } while (*str != '\0'); if (argc < 2) @@ -133,7 +133,7 @@ static int parse_probepoint(const struct option *opt __used, pp->line = atoi(ptr); if (!pp->file || !pp->line) semantic_error("Failed to parse line."); - eprintf("file:%s line:%d\n", pp->file, pp->line); + pr_debug("file:%s line:%d\n", pp->file, pp->line); } else { /* Function name */ ptr = strchr(arg, '+'); @@ -150,8 +150,8 @@ static int parse_probepoint(const struct option *opt __used, pp->file = strdup(ptr); } pp->function = strdup(arg); - eprintf("symbol:%s file:%s offset:%d\n", - pp->function, pp->file, pp->offset); + pr_debug("symbol:%s file:%s offset:%d\n", + pp->function, pp->file, pp->offset); } free(argv[1]); if (pp->file) @@ -175,7 +175,7 @@ static int parse_probepoint(const struct option *opt __used, session.need_dwarf = 1; } - eprintf("%d arguments\n", pp->nr_args); + pr_debug("%d arguments\n", pp->nr_args); return 0; } @@ -188,7 +188,7 @@ static int open_default_vmlinux(void) ret = uname(&uts); if (ret) { - eprintf("uname() failed.\n"); + pr_debug("uname() failed.\n"); return -errno; } session.release = uts.release; @@ -196,12 +196,12 @@ static int open_default_vmlinux(void) ret = snprintf(fname, MAX_PATH_LEN, default_search_path[i], session.release); if (ret >= MAX_PATH_LEN || ret < 0) { - eprintf("Filename(%d,%s) is too long.\n", i, + pr_debug("Filename(%d,%s) is too long.\n", i, uts.release); errno = E2BIG; return -E2BIG; } - eprintf("try to open %s\n", fname); + pr_debug("try to open %s\n", fname); fd = open(fname, O_RDONLY); if (fd >= 0) break; @@ -341,7 +341,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) ret = find_probepoint(fd, pp); if (ret <= 0) die("No probe point found.\n"); - eprintf("probe event %s found\n", session.events[j]); + pr_debug("probe event %s found\n", session.events[j]); } close(fd); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index be997ab..54e7071 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -136,7 +136,7 @@ static Dwarf_Unsigned die_get_fileno(Dwarf_Die cu_die, const char *fname) dwarf_dealloc(__dw_debug, srcs, DW_DLA_LIST); } if (found) - eprintf("found fno: %d\n", (int)found); + pr_debug("found fno: %d\n", (int)found); return found; } @@ -442,7 +442,7 @@ static void find_variable(Dwarf_Die sp_die, struct probe_finder *pf) return ; } - eprintf("Searching '%s' variable in context.\n", pf->var); + pr_debug("Searching '%s' variable in context.\n", pf->var); /* Search child die for local variables and parameters. */ ret = search_die_from_children(sp_die, variable_callback, pf); if (!ret) @@ -552,7 +552,7 @@ static void find_by_line(Dwarf_Die cu_die, struct probe_finder *pf) ret = dwarf_lineaddr(lines[i], &addr, &__dw_error); DIE_IF(ret != DW_DLV_OK); - eprintf("Probe point found: 0x%llx\n", addr); + pr_debug("Probe point found: 0x%llx\n", addr); pf->addr = addr; /* Search a real subprogram including this line, */ ret = search_die_from_children(cu_die, probeaddr_callback, pf); @@ -583,8 +583,8 @@ static int probefunc_callback(struct die_link *dlink, void *data) &pf->inl_offs, &__dw_error); DIE_IF(ret != DW_DLV_OK); - eprintf("inline definition offset %lld\n", - pf->inl_offs); + pr_debug("inline definition offset %lld\n", + pf->inl_offs); return 0; } /* Get probe address */ @@ -599,7 +599,7 @@ static int probefunc_callback(struct die_link *dlink, void *data) /* Get probe address */ pf->addr = die_get_entrypc(dlink->die); pf->addr += pp->offset; - eprintf("found inline addr: 0x%llx\n", pf->addr); + pr_debug("found inline addr: 0x%llx\n", pf->addr); /* Inlined function. Get a real subprogram */ for (lk = dlink->parent; lk != NULL; lk = lk->parent) { tag = 0; -- cgit v0.10.2 From 40b1f4e5113eafc5e84f2ba86822df66087fcb25 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Thu, 22 Oct 2009 14:39:28 +1100 Subject: irq: trivial: Fix typo in comment for #endif The comment suggests this #endif is CONFIG_X86 but it's really CONFIG_TRACE_IRQFLAGS_SUPPORT Signed-off-by: Michael Neuling Cc: michael@ellerman.id.au LKML-Reference: <18191.1256182768@neuling.org> Signed-off-by: Ingo Molnar diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index b02a3f1..006bf45 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -124,6 +124,6 @@ typecheck(unsigned long, flags); \ raw_irqs_disabled_flags(flags); \ }) -#endif /* CONFIG_X86 */ +#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ #endif -- cgit v0.10.2 From 72f279b256d520e321a850880d094bc0bcbf45d6 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Thu, 22 Oct 2009 19:19:34 +0800 Subject: generic-ipi: Fix misleading smp_call_function*() description After commit:8969a5ede0f9e17da4b943712429aef2c9bcd82b "generic-ipi: remove kmalloc()", wait = 0 can be guaranteed. Signed-off-by: Sheng Yang Cc: Peter Zijlstra Cc: Jens Axboe Cc: Nick Piggin LKML-Reference: <1256210374-25354-1-git-send-email-sheng@linux.intel.com> Signed-off-by: Ingo Molnar diff --git a/kernel/smp.c b/kernel/smp.c index c9d1c78..8bd618f 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -265,9 +265,7 @@ static DEFINE_PER_CPU(struct call_single_data, csd_data); * @info: An arbitrary pointer to pass to the function. * @wait: If true, wait until function has completed on other CPUs. * - * Returns 0 on success, else a negative status code. Note that @wait - * will be implicitly turned on in case of allocation failures, since - * we fall back to on-stack allocation. + * Returns 0 on success, else a negative status code. */ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, int wait) @@ -355,9 +353,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, * @wait: If true, wait (atomically) until function has completed * on other CPUs. * - * If @wait is true, then returns once @func has returned. Note that @wait - * will be implicitly turned on in case of allocation failures, since - * we fall back to on-stack allocation. + * If @wait is true, then returns once @func has returned. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. Preemption @@ -443,8 +439,7 @@ EXPORT_SYMBOL(smp_call_function_many); * Returns 0. * * If @wait is true, then returns once @func has returned; otherwise - * it returns just before the target cpu calls @func. In case of allocation - * failure, @wait will be implicitly turned on. + * it returns just before the target cpu calls @func. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. -- cgit v0.10.2 From 5c828713358cb9df8aa174371edcbbb62203a490 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 23 Oct 2009 14:58:11 +0200 Subject: ratelimit: Make suppressed output messages more useful Today I got: [39648.224782] Registered led device: iwl-phy0::TX [40676.545099] __ratelimit: 246 callbacks suppressed [40676.545103] abcdef[23675]: segfault at 0 ... as you can see the ratelimit message contains a function prefix. Since this is always __ratelimit, this wont help much. This patch changes __ratelimit and printk_ratelimit to print the function name that calls ratelimit. This will pinpoint the responsible function, as long as not several different places call ratelimit with the same ratelimit state at the same time. In that case we catch only one random function that calls ratelimit after the wait period. Signed-off-by: Christian Borntraeger Cc: Dave Young Cc: Linus Torvalds CC: Andrew Morton LKML-Reference: <200910231458.11832.borntraeger@de.ibm.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 3305f33..21d0d82 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -240,7 +240,8 @@ asmlinkage int vprintk(const char *fmt, va_list args) asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern int printk_ratelimit(void); +extern int __printk_ratelimit(const char *func); +#define printk_ratelimit() __printk_ratelimit(__func__) extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msec); diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index 187bc16..668cf1b 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -25,6 +25,7 @@ struct ratelimit_state { .burst = burst_init, \ } -extern int __ratelimit(struct ratelimit_state *rs); +extern int ___ratelimit(struct ratelimit_state *rs, const char *func); +#define __ratelimit(state) ___ratelimit(state, __func__) #endif /* _LINUX_RATELIMIT_H */ diff --git a/kernel/printk.c b/kernel/printk.c index b997c89..8283dbe 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -1364,11 +1364,11 @@ late_initcall(disable_boot_consoles); */ DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); -int printk_ratelimit(void) +int __printk_ratelimit(const char *func) { - return __ratelimit(&printk_ratelimit_state); + return ___ratelimit(&printk_ratelimit_state, func); } -EXPORT_SYMBOL(printk_ratelimit); +EXPORT_SYMBOL(__printk_ratelimit); /** * printk_timed_ratelimit - caller-controlled printk ratelimiting diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 5551731..09f5ce1 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -20,7 +20,7 @@ * This enforces a rate limit: not more than @rs->ratelimit_burst callbacks * in every @rs->ratelimit_jiffies */ -int __ratelimit(struct ratelimit_state *rs) +int ___ratelimit(struct ratelimit_state *rs, const char *func) { unsigned long flags; int ret; @@ -43,7 +43,7 @@ int __ratelimit(struct ratelimit_state *rs) if (time_is_before_jiffies(rs->begin + rs->interval)) { if (rs->missed) printk(KERN_WARNING "%s: %d callbacks suppressed\n", - __func__, rs->missed); + func, rs->missed); rs->begin = 0; rs->printed = 0; rs->missed = 0; @@ -59,4 +59,4 @@ int __ratelimit(struct ratelimit_state *rs) return ret; } -EXPORT_SYMBOL(__ratelimit); +EXPORT_SYMBOL(___ratelimit); -- cgit v0.10.2 From 6e8e16c7bc298d7887584c3d027e05db3e86eed9 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 22 Oct 2009 15:38:26 -0400 Subject: SELinux: add .gitignore files for dynamic classes The SELinux dynamic class work in c6d3aaa4e35c71a32a86ececacd4eea7ecfc316c creates a number of dynamic header files and scripts. Add .gitignore files so git doesn't complain about these. Signed-off-by: Eric Paris Acked-by: Stephen D. Smalley Signed-off-by: James Morris diff --git a/Documentation/dontdiff b/Documentation/dontdiff index e1efc40..e151b2a 100644 --- a/Documentation/dontdiff +++ b/Documentation/dontdiff @@ -65,6 +65,7 @@ aicdb.h* asm-offsets.h asm_offsets.h autoconf.h* +av_permissions.h bbootsect bin2c binkernel.spec @@ -95,12 +96,14 @@ docproc elf2ecoff elfconfig.h* fixdep +flask.h fore200e_mkfirm fore200e_pca_fw.c* gconf gen-devlist gen_crc32table gen_init_cpio +genheaders genksyms *_gray256.c ihex2fw diff --git a/scripts/selinux/genheaders/.gitignore b/scripts/selinux/genheaders/.gitignore new file mode 100644 index 0000000..4c0b646 --- /dev/null +++ b/scripts/selinux/genheaders/.gitignore @@ -0,0 +1 @@ +genheaders diff --git a/security/selinux/.gitignore b/security/selinux/.gitignore new file mode 100644 index 0000000..2e5040a --- /dev/null +++ b/security/selinux/.gitignore @@ -0,0 +1,2 @@ +av_permissions.h +flask.h -- cgit v0.10.2 From bb015f0c85362aa767f8f00f50a40d85e489414f Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 19 Oct 2009 11:43:32 +0200 Subject: pcmcia: drop already defined PCI_IDs Out of 10 PCI_IDs found in the PCMCIA subsystem, only two were not defined in pci_ids.h. Move them and drop the duplicates. Successfully build-tested. Signed-off-by: Wolfram Sang Cc: Jesse Barnes Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/cirrus.h b/drivers/pcmcia/cirrus.h index ecd4fc7..446a457 100644 --- a/drivers/pcmcia/cirrus.h +++ b/drivers/pcmcia/cirrus.h @@ -30,16 +30,6 @@ #ifndef _LINUX_CIRRUS_H #define _LINUX_CIRRUS_H -#ifndef PCI_VENDOR_ID_CIRRUS -#define PCI_VENDOR_ID_CIRRUS 0x1013 -#endif -#ifndef PCI_DEVICE_ID_CIRRUS_6729 -#define PCI_DEVICE_ID_CIRRUS_6729 0x1100 -#endif -#ifndef PCI_DEVICE_ID_CIRRUS_6832 -#define PCI_DEVICE_ID_CIRRUS_6832 0x1110 -#endif - #define PD67_MISC_CTL_1 0x16 /* Misc control 1 */ #define PD67_FIFO_CTL 0x17 /* FIFO control */ #define PD67_MISC_CTL_2 0x1E /* Misc control 2 */ diff --git a/drivers/pcmcia/o2micro.h b/drivers/pcmcia/o2micro.h index 72188c4..624442f 100644 --- a/drivers/pcmcia/o2micro.h +++ b/drivers/pcmcia/o2micro.h @@ -30,28 +30,6 @@ #ifndef _LINUX_O2MICRO_H #define _LINUX_O2MICRO_H -#ifndef PCI_VENDOR_ID_O2 -#define PCI_VENDOR_ID_O2 0x1217 -#endif -#ifndef PCI_DEVICE_ID_O2_6729 -#define PCI_DEVICE_ID_O2_6729 0x6729 -#endif -#ifndef PCI_DEVICE_ID_O2_6730 -#define PCI_DEVICE_ID_O2_6730 0x673a -#endif -#ifndef PCI_DEVICE_ID_O2_6832 -#define PCI_DEVICE_ID_O2_6832 0x6832 -#endif -#ifndef PCI_DEVICE_ID_O2_6836 -#define PCI_DEVICE_ID_O2_6836 0x6836 -#endif -#ifndef PCI_DEVICE_ID_O2_6812 -#define PCI_DEVICE_ID_O2_6812 0x6872 -#endif -#ifndef PCI_DEVICE_ID_O2_6933 -#define PCI_DEVICE_ID_O2_6933 0x6933 -#endif - /* Additional PCI configuration registers */ #define O2_MUX_CONTROL 0x90 /* 32 bit */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f490e7a..857cc34 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1632,6 +1632,8 @@ #define PCI_DEVICE_ID_O2_6730 0x673a #define PCI_DEVICE_ID_O2_6832 0x6832 #define PCI_DEVICE_ID_O2_6836 0x6836 +#define PCI_DEVICE_ID_O2_6812 0x6872 +#define PCI_DEVICE_ID_O2_6933 0x6933 #define PCI_VENDOR_ID_3DFX 0x121a #define PCI_DEVICE_ID_3DFX_VOODOO 0x0001 -- cgit v0.10.2 From 6ae3b84d979308671bf6f6a2123c258a8603d61c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Oct 2009 18:14:32 +0200 Subject: serial_cs: use pcmcia_loop_config() and pre-determined values As the PCMCIA core already determines the multifunction count, the ConfigBase address and the Present value, we can use them directly instead of parsing the CIS again. By making use of pcmcia_loop_config(), we can further remove the remaining call to pcmcia_get_first_tuple() and friends. CC: linux-serial@vger.kernel.org CC: Russell King Signed-off-by: Dominik Brodowski diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c index ff4617e..7bf02cf 100644 --- a/drivers/serial/serial_cs.c +++ b/drivers/serial/serial_cs.c @@ -426,21 +426,6 @@ static int setup_serial(struct pcmcia_device *handle, struct serial_info * info, /*====================================================================*/ -static int -first_tuple(struct pcmcia_device *handle, tuple_t * tuple, cisparse_t * parse) -{ - int i; - i = pcmcia_get_first_tuple(handle, tuple); - if (i != 0) - return i; - i = pcmcia_get_tuple_data(handle, tuple); - if (i != 0) - return i; - return pcmcia_parse_tuple(tuple, parse); -} - -/*====================================================================*/ - static int simple_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cf, cistpl_cftable_entry_t *dflt, @@ -665,6 +650,25 @@ static int multi_config(struct pcmcia_device *link) return 0; } +static int serial_check_for_multi(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cf, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + struct serial_info *info = p_dev->priv; + + if ((cf->io.nwin == 1) && (cf->io.win[0].len % 8 == 0)) + info->multi = cf->io.win[0].len >> 3; + + if ((cf->io.nwin == 2) && (cf->io.win[0].len == 8) && + (cf->io.win[1].len == 8)) + info->multi = 2; + + return 0; /* break */ +} + + /*====================================================================== serial_config() is scheduled to run after a CARD_INSERTION event @@ -676,46 +680,14 @@ static int multi_config(struct pcmcia_device *link) static int serial_config(struct pcmcia_device * link) { struct serial_info *info = link->priv; - struct serial_cfg_mem *cfg_mem; - tuple_t *tuple; - u_char *buf; - cisparse_t *parse; - cistpl_cftable_entry_t *cf; - int i, last_ret, last_fn; + int i; DEBUG(0, "serial_config(0x%p)\n", link); - cfg_mem = kmalloc(sizeof(struct serial_cfg_mem), GFP_KERNEL); - if (!cfg_mem) - goto failed; - - tuple = &cfg_mem->tuple; - parse = &cfg_mem->parse; - cf = &parse->cftable_entry; - buf = cfg_mem->buf; - - tuple->TupleData = (cisdata_t *) buf; - tuple->TupleOffset = 0; - tuple->TupleDataMax = 255; - tuple->Attributes = 0; - - /* Get configuration register information */ - tuple->DesiredTuple = CISTPL_CONFIG; - last_ret = first_tuple(link, tuple, parse); - if (last_ret != 0) { - last_fn = ParseTuple; - goto cs_failed; - } - link->conf.ConfigBase = parse->config.base; - link->conf.Present = parse->config.rmask[0]; - /* Is this a compliant multifunction card? */ - tuple->DesiredTuple = CISTPL_LONGLINK_MFC; - tuple->Attributes = TUPLE_RETURN_COMMON | TUPLE_RETURN_LINK; - info->multi = (first_tuple(link, tuple, parse) == 0); + info->multi = (link->socket->functions > 1); /* Is this a multiport card? */ - tuple->DesiredTuple = CISTPL_MANFID; info->manfid = link->manf_id; info->prodid = link->card_id; @@ -730,20 +702,11 @@ static int serial_config(struct pcmcia_device * link) /* Another check for dual-serial cards: look for either serial or multifunction cards that ask for appropriate IO port ranges */ - tuple->DesiredTuple = CISTPL_FUNCID; if ((info->multi == 0) && (link->has_func_id) && ((link->func_id == CISTPL_FUNCID_MULTI) || - (link->func_id == CISTPL_FUNCID_SERIAL))) { - tuple->DesiredTuple = CISTPL_CFTABLE_ENTRY; - if (first_tuple(link, tuple, parse) == 0) { - if ((cf->io.nwin == 1) && (cf->io.win[0].len % 8 == 0)) - info->multi = cf->io.win[0].len >> 3; - if ((cf->io.nwin == 2) && (cf->io.win[0].len == 8) && - (cf->io.win[1].len == 8)) - info->multi = 2; - } - } + (link->func_id == CISTPL_FUNCID_SERIAL))) + pcmcia_loop_config(link, serial_check_for_multi, info); /* * Apply any multi-port quirk. @@ -768,14 +731,10 @@ static int serial_config(struct pcmcia_device * link) goto failed; link->dev_node = &info->node[0]; - kfree(cfg_mem); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: serial_remove(link); - kfree(cfg_mem); return -ENODEV; } -- cgit v0.10.2 From 6c21a7fb492bf7e2c4985937082ce58ddeca84bd Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Thu, 22 Oct 2009 17:30:13 -0400 Subject: LSM: imbed ima calls in the security hooks Based on discussions on LKML and LSM, where there are consecutive security_ and ima_ calls in the vfs layer, move the ima_ calls to the existing security_ hooks. Signed-off-by: Mimi Zohar Signed-off-by: James Morris diff --git a/fs/exec.c b/fs/exec.c index d49be6b..d164342 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include #include @@ -1211,9 +1210,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) retval = security_bprm_check(bprm); if (retval) return retval; - retval = ima_bprm_check(bprm); - if (retval) - return retval; /* kernel module loader fixup */ /* so we don't try to load run modprobe in kernel space. */ diff --git a/fs/file_table.c b/fs/file_table.c index 8eb4404..4bef4c0 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -280,7 +279,6 @@ void __fput(struct file *file) if (file->f_op && file->f_op->release) file->f_op->release(inode, file); security_file_free(file); - ima_file_free(file); if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) cdev_put(inode->i_cdev); fops_put(file->f_op); diff --git a/fs/inode.c b/fs/inode.c index 4d8e3be..06c1f02 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -157,11 +156,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) if (security_inode_alloc(inode)) goto out; - - /* allocate and initialize an i_integrity */ - if (ima_inode_alloc(inode)) - goto out_free_security; - spin_lock_init(&inode->i_lock); lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); @@ -201,9 +195,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode) #endif return 0; - -out_free_security: - security_inode_free(inode); out: return -ENOMEM; } @@ -235,7 +226,6 @@ static struct inode *alloc_inode(struct super_block *sb) void __destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); - ima_inode_free(inode); security_inode_free(inode); fsnotify_inode_delete(inode); #ifdef CONFIG_FS_POSIX_ACL diff --git a/mm/mmap.c b/mm/mmap.c index 73f5e4b..292ddc3 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -1061,9 +1060,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, error = security_file_mmap(file, reqprot, prot, flags, addr, 0); if (error) return error; - error = ima_file_mmap(file, prot); - if (error) - return error; return mmap_region(file, addr, len, flags, vm_flags, pgoff); } diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 53d9764..3d7846d 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -3,6 +3,7 @@ config IMA bool "Integrity Measurement Architecture(IMA)" depends on ACPI + depends on SECURITY select SECURITYFS select CRYPTO select CRYPTO_HMAC diff --git a/security/security.c b/security/security.c index 2797573..684d5ee 100644 --- a/security/security.c +++ b/security/security.c @@ -16,6 +16,7 @@ #include #include #include +#include /* Boot-time LSM user choice */ static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1]; @@ -235,7 +236,12 @@ int security_bprm_set_creds(struct linux_binprm *bprm) int security_bprm_check(struct linux_binprm *bprm) { - return security_ops->bprm_check_security(bprm); + int ret; + + ret = security_ops->bprm_check_security(bprm); + if (ret) + return ret; + return ima_bprm_check(bprm); } void security_bprm_committing_creds(struct linux_binprm *bprm) @@ -352,12 +358,21 @@ EXPORT_SYMBOL(security_sb_parse_opts_str); int security_inode_alloc(struct inode *inode) { + int ret; + inode->i_security = NULL; - return security_ops->inode_alloc_security(inode); + ret = security_ops->inode_alloc_security(inode); + if (ret) + return ret; + ret = ima_inode_alloc(inode); + if (ret) + security_inode_free(inode); + return ret; } void security_inode_free(struct inode *inode) { + ima_inode_free(inode); security_ops->inode_free_security(inode); } @@ -648,6 +663,8 @@ int security_file_alloc(struct file *file) void security_file_free(struct file *file) { security_ops->file_free_security(file); + if (file->f_dentry) + ima_file_free(file); } int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -659,7 +676,12 @@ int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags, unsigned long addr, unsigned long addr_only) { - return security_ops->file_mmap(file, reqprot, prot, flags, addr, addr_only); + int ret; + + ret = security_ops->file_mmap(file, reqprot, prot, flags, addr, addr_only); + if (ret) + return ret; + return ima_file_mmap(file, prot); } int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, -- cgit v0.10.2 From ce0e7b28fb75cb003cfc8d0238613aaf1c55e797 Mon Sep 17 00:00:00 2001 From: Ryota Ozaki Date: Sat, 24 Oct 2009 01:20:10 +0900 Subject: sched, cpuacct: Fix niced guest time accounting CPU time of a guest is always accounted in 'user' time without concern for the nice value of its counterpart process although the guest is scheduled under the nice value. This patch fixes the defect and accounts cpu time of a niced guest in 'nice' time as same as a niced process. And also the patch adds 'guest_nice' to cpuacct. The value provides niced guest cpu time which is like 'nice' to 'user'. The original discussions can be found here: http://www.mail-archive.com/kvm@vger.kernel.org/msg23982.html http://www.mail-archive.com/kvm@vger.kernel.org/msg23860.html Signed-off-by: Ryota Ozaki Acked-by: Avi Kivity Cc: Peter Zijlstra LKML-Reference: <1256314810-7897-1-git-send-email-ozaki.ryota@gmail.com> Signed-off-by: Ingo Molnar diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 2c48f94..4af0018 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1072,7 +1072,8 @@ second). The meanings of the columns are as follows, from left to right: - irq: servicing interrupts - softirq: servicing softirqs - steal: involuntary wait -- guest: running a guest +- guest: running a normal guest +- guest_nice: running a niced guest The "intr" line gives counts of interrupts serviced since boot time, for each of the possible system interrupts. The first column is the total of all diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 7cc726c..b9b7aad 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v) int i, j; unsigned long jif; cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; - cputime64_t guest; + cputime64_t guest, guest_nice; u64 sum = 0; u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; @@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v) user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; - guest = cputime64_zero; + guest = guest_nice = cputime64_zero; getboottime(&boottime); jif = boottime.tv_sec; @@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v) softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); + guest_nice = cputime64_add(guest_nice, + kstat_cpu(i).cpustat.guest_nice); for_each_irq_nr(j) { sum += kstat_irqs_cpu(j, i); } @@ -65,7 +67,8 @@ static int show_stat(struct seq_file *p, void *v) } sum += arch_irq_stat(); - seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu " + "%llu\n", (unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(nice), (unsigned long long)cputime64_to_clock_t(system), @@ -74,7 +77,8 @@ static int show_stat(struct seq_file *p, void *v) (unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(softirq), (unsigned long long)cputime64_to_clock_t(steal), - (unsigned long long)cputime64_to_clock_t(guest)); + (unsigned long long)cputime64_to_clock_t(guest), + (unsigned long long)cputime64_to_clock_t(guest_nice)); for_each_online_cpu(i) { /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ @@ -88,8 +92,10 @@ static int show_stat(struct seq_file *p, void *v) softirq = kstat_cpu(i).cpustat.softirq; steal = kstat_cpu(i).cpustat.steal; guest = kstat_cpu(i).cpustat.guest; + guest_nice = kstat_cpu(i).cpustat.guest_nice; seq_printf(p, - "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " + "%llu\n", i, (unsigned long long)cputime64_to_clock_t(user), (unsigned long long)cputime64_to_clock_t(nice), @@ -99,7 +105,8 @@ static int show_stat(struct seq_file *p, void *v) (unsigned long long)cputime64_to_clock_t(irq), (unsigned long long)cputime64_to_clock_t(softirq), (unsigned long long)cputime64_to_clock_t(steal), - (unsigned long long)cputime64_to_clock_t(guest)); + (unsigned long long)cputime64_to_clock_t(guest), + (unsigned long long)cputime64_to_clock_t(guest_nice)); } seq_printf(p, "intr %llu", (unsigned long long)sum); diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 348fa88..c059044 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -25,6 +25,7 @@ struct cpu_usage_stat { cputime64_t iowait; cputime64_t steal; cputime64_t guest; + cputime64_t guest_nice; }; struct kernel_stat { diff --git a/kernel/sched.c b/kernel/sched.c index e520581..67be4d0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5017,8 +5017,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, p->gtime = cputime_add(p->gtime, cputime); /* Add guest time to cpustat. */ - cpustat->user = cputime64_add(cpustat->user, tmp); - cpustat->guest = cputime64_add(cpustat->guest, tmp); + if (TASK_NICE(p) > 0) { + cpustat->nice = cputime64_add(cpustat->nice, tmp); + cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp); + } else { + cpustat->user = cputime64_add(cpustat->user, tmp); + cpustat->guest = cputime64_add(cpustat->guest, tmp); + } } /* -- cgit v0.10.2 From 9b1d82fa1611706fa7ee1505f290160a18caf95d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 25 Oct 2009 19:03:50 -0700 Subject: rcu: "Tiny RCU", The Bloatwatch Edition This patch is a version of RCU designed for !SMP provided for a small-footprint RCU implementation. In particular, the implementation of synchronize_rcu() is extremely lightweight and high performance. It passes rcutorture testing in each of the four relevant configurations (combinations of NO_HZ and PREEMPT) on x86. This saves about 1K bytes compared to old Classic RCU (which is no longer in mainline), and more than three kilobytes compared to Hierarchical RCU (updated to 2.6.30): CONFIG_TREE_RCU: text data bss dec filename 183 4 0 187 kernel/rcupdate.o 2783 520 36 3339 kernel/rcutree.o 3526 Total (vs 4565 for v7) CONFIG_TREE_PREEMPT_RCU: text data bss dec filename 263 4 0 267 kernel/rcupdate.o 4594 776 52 5422 kernel/rcutree.o 5689 Total (6155 for v7) CONFIG_TINY_RCU: text data bss dec filename 96 4 0 100 kernel/rcupdate.o 734 24 0 758 kernel/rcutiny.o 858 Total (vs 848 for v7) The above is for x86. Your mileage may vary on other platforms. Further compression is possible, but is being procrastinated. Changes from v7 (http://lkml.org/lkml/2009/10/9/388) o Apply Lai Jiangshan's review comments (aside from might_sleep() in synchronize_sched(), which is covered by SMP builds). o Fix up expedited primitives. Changes from v6 (http://lkml.org/lkml/2009/9/23/293). o Forward ported to put it into the 2.6.33 stream. o Added lockdep support. o Make lightweight rcu_barrier. Changes from v5 (http://lkml.org/lkml/2009/6/23/12). o Ported to latest pre-2.6.32 merge window kernel. - Renamed rcu_qsctr_inc() to rcu_sched_qs(). - Renamed rcu_bh_qsctr_inc() to rcu_bh_qs(). - Provided trivial rcu_cpu_notify(). - Provided trivial exit_rcu(). - Provided trivial rcu_needs_cpu(). - Fixed up the rcu_*_enter/exit() functions in linux/hardirq.h. o Removed the dependence on EMBEDDED, with a view to making TINY_RCU default for !SMP at some time in the future. o Added (trivial) support for expedited grace periods. Changes from v4 (http://lkml.org/lkml/2009/5/2/91) include: o Squeeze the size down a bit further by removing the ->completed field from struct rcu_ctrlblk. o This permits synchronize_rcu() to become the empty function. Previous concerns about rcutorture were unfounded, as rcutorture correctly handles a constant value from rcu_batches_completed() and rcu_batches_completed_bh(). Changes from v3 (http://lkml.org/lkml/2009/3/29/221) include: o Changed rcu_batches_completed(), rcu_batches_completed_bh() rcu_enter_nohz(), rcu_exit_nohz(), rcu_nmi_enter(), and rcu_nmi_exit(), to be static inlines, as suggested by David Howells. Doing this saves about 100 bytes from rcutiny.o. (The numbers between v3 and this v4 of the patch are not directly comparable, since they are against different versions of Linux.) Changes from v2 (http://lkml.org/lkml/2009/2/3/333) include: o Fix whitespace issues. o Change short-circuit "||" operator to instead be "+" in order to fix performance bug noted by "kraai" on LWN. (http://lwn.net/Articles/324348/) Changes from v1 (http://lkml.org/lkml/2009/1/13/440) include: o This version depends on EMBEDDED as well as !SMP, as suggested by Ingo. o Updated rcu_needs_cpu() to unconditionally return zero, permitting the CPU to enter dynticks-idle mode at any time. This works because callbacks can be invoked upon entry to dynticks-idle mode. o Paul is now OK with this being included, based on a poll at the Kernel Miniconf at linux.conf.au, where about ten people said that they cared about saving 900 bytes on single-CPU systems. o Applies to both mainline and tip/core/rcu. Signed-off-by: Paul E. McKenney Acked-by: David Howells Acked-by: Josh Triplett Reviewed-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: avi@redhat.com Cc: mtosatti@redhat.com LKML-Reference: <12565226351355-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 6d527ee..d5b3876 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -139,10 +139,34 @@ static inline void account_system_vtime(struct task_struct *tsk) #endif #if defined(CONFIG_NO_HZ) +#if defined(CONFIG_TINY_RCU) +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +static inline void rcu_irq_enter(void) +{ + rcu_exit_nohz(); +} + +static inline void rcu_irq_exit(void) +{ + rcu_enter_nohz(); +} + +static inline void rcu_nmi_enter(void) +{ +} + +static inline void rcu_nmi_exit(void) +{ +} + +#else extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); extern void rcu_nmi_enter(void); extern void rcu_nmi_exit(void); +#endif #else # define rcu_irq_enter() do { } while (0) # define rcu_irq_exit() do { } while (0) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 3ebd0b7..6dd71fa 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -68,11 +68,17 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); extern void rcu_scheduler_starting(void); +#ifndef CONFIG_TINY_RCU extern int rcu_needs_cpu(int cpu); +#else +static inline int rcu_needs_cpu(int cpu) { return 0; } +#endif extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include +#elif CONFIG_TINY_RCU +#include #else #error "Unknown RCU implementation specified to kernel configuration" #endif diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h new file mode 100644 index 0000000..891073c --- /dev/null +++ b/include/linux/rcutiny.h @@ -0,0 +1,97 @@ +/* + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Paul E. McKenney + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ + +#ifndef __LINUX_TINY_H +#define __LINUX_TINY_H + +#include + +void rcu_sched_qs(int cpu); +void rcu_bh_qs(int cpu); + +#define __rcu_read_lock() preempt_disable() +#define __rcu_read_unlock() preempt_enable() +#define __rcu_read_lock_bh() local_bh_disable() +#define __rcu_read_unlock_bh() local_bh_enable() +#define call_rcu_sched call_rcu + +#define rcu_init_sched() do { } while (0) +extern void rcu_check_callbacks(int cpu, int user); +extern void __rcu_init(void); + +/* + * Return the number of grace periods. + */ +static inline long rcu_batches_completed(void) +{ + return 0; +} + +/* + * Return the number of bottom-half grace periods. + */ +static inline long rcu_batches_completed_bh(void) +{ + return 0; +} + +extern int rcu_expedited_torture_stats(char *page); + +static inline void synchronize_rcu_expedited(void) +{ + synchronize_sched(); +} + +static inline void synchronize_rcu_bh_expedited(void) +{ + synchronize_sched(); +} + +struct notifier_block; +extern int rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu); + +#ifdef CONFIG_NO_HZ + +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +#else /* #ifdef CONFIG_NO_HZ */ + +static inline void rcu_enter_nohz(void) +{ +} + +static inline void rcu_exit_nohz(void) +{ +} + +#endif /* #else #ifdef CONFIG_NO_HZ */ + +static inline void exit_rcu(void) +{ +} + +#endif /* __LINUX_RCUTINY_H */ diff --git a/init/Kconfig b/init/Kconfig index 09c5c64..d367bf6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -334,6 +334,15 @@ config TREE_PREEMPT_RCU is also required. It also scales down nicely to smaller systems. +config TINY_RCU + bool "UP-only small-memory-footprint RCU" + depends on !SMP + help + This option selects the RCU implementation that is + designed for UP systems from which real-time response + is not required. This option greatly reduces the + memory footprint of RCU. + endchoice config RCU_TRACE diff --git a/kernel/Makefile b/kernel/Makefile index b8d4cd8..e9a0e6e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -82,6 +82,7 @@ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o +obj-$(CONFIG_TINY_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 4001833..7625f20 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -67,6 +67,8 @@ void wakeme_after_rcu(struct rcu_head *head) complete(&rcu->completion); } +#ifndef CONFIG_TINY_RCU + #ifdef CONFIG_TREE_PREEMPT_RCU /** @@ -157,6 +159,8 @@ void synchronize_rcu_bh(void) } EXPORT_SYMBOL_GPL(synchronize_rcu_bh); +#endif /* #ifndef CONFIG_TINY_RCU */ + static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, unsigned long action, void *hcpu) { diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c new file mode 100644 index 0000000..0b54efd --- /dev/null +++ b/kernel/rcutiny.c @@ -0,0 +1,294 @@ +/* + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Paul E. McKenney + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Global control variables for rcupdate callback mechanism. */ +struct rcu_ctrlblk { + struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ + struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ + struct rcu_head **curtail; /* ->next pointer of last CB. */ +}; + +/* Definition for rcupdate control block. */ +static struct rcu_ctrlblk rcu_ctrlblk = { + .rcucblist = NULL, + .donetail = &rcu_ctrlblk.rcucblist, + .curtail = &rcu_ctrlblk.rcucblist, +}; +static struct rcu_ctrlblk rcu_bh_ctrlblk = { + .rcucblist = NULL, + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, +}; + +#ifdef CONFIG_NO_HZ + +static long rcu_dynticks_nesting = 1; + +/* + * Enter dynticks-idle mode, which is an extended quiescent state + * if we have fully entered that mode (i.e., if the new value of + * dynticks_nesting is zero). + */ +void rcu_enter_nohz(void) +{ + if (--rcu_dynticks_nesting == 0) + rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ +} + +/* + * Exit dynticks-idle mode, so that we are no longer in an extended + * quiescent state. + */ +void rcu_exit_nohz(void) +{ + rcu_dynticks_nesting++; +} + +#endif /* #ifdef CONFIG_NO_HZ */ + +/* + * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc(). + * Also disable irqs to avoid confusion due to interrupt handlers invoking + * call_rcu(). + */ +static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) +{ + unsigned long flags; + + local_irq_save(flags); + if (rcp->rcucblist != NULL && + rcp->donetail != rcp->curtail) { + rcp->donetail = rcp->curtail; + local_irq_restore(flags); + return 1; + } + local_irq_restore(flags); + return 0; +} + +/* + * Record an rcu quiescent state. And an rcu_bh quiescent state while we + * are at it, given that any rcu quiescent state is also an rcu_bh + * quiescent state. Use "+" instead of "||" to defeat short circuiting. + */ +void rcu_sched_qs(int cpu) +{ + if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Record an rcu_bh quiescent state. + */ +void rcu_bh_qs(int cpu) +{ + if (rcu_qsctr_help(&rcu_bh_ctrlblk)) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Check to see if the scheduling-clock interrupt came from an extended + * quiescent state, and, if so, tell RCU about it. + */ +void rcu_check_callbacks(int cpu, int user) +{ + if (user || + (idle_cpu(cpu) && + !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) + rcu_sched_qs(cpu); + else if (!in_softirq()) + rcu_bh_qs(cpu); +} + +/* + * Helper function for rcu_process_callbacks() that operates on the + * specified rcu_ctrlkblk structure. + */ +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) +{ + unsigned long flags; + struct rcu_head *next, *list; + + /* If no RCU callbacks ready to invoke, just return. */ + if (&rcp->rcucblist == rcp->donetail) + return; + + /* Move the ready-to-invoke callbacks to a local list. */ + local_irq_save(flags); + list = rcp->rcucblist; + rcp->rcucblist = *rcp->donetail; + *rcp->donetail = NULL; + if (rcp->curtail == rcp->donetail) + rcp->curtail = &rcp->rcucblist; + rcp->donetail = &rcp->rcucblist; + local_irq_restore(flags); + + /* Invoke the callbacks on the local list. */ + while (list) { + next = list->next; + prefetch(next); + list->func(list); + list = next; + } +} + +/* + * Invoke any callbacks whose grace period has completed. + */ +static void rcu_process_callbacks(struct softirq_action *unused) +{ + __rcu_process_callbacks(&rcu_ctrlblk); + __rcu_process_callbacks(&rcu_bh_ctrlblk); +} + +/* + * Null function to handle CPU being onlined. Longer term, we want to + * make TINY_RCU avoid using rcupdate.c, but later... + */ +int rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + return NOTIFY_OK; +} + +/* + * Wait for a grace period to elapse. But it is illegal to invoke + * synchronize_sched() from within an RCU read-side critical section. + * Therefore, any legal call to synchronize_sched() is a quiescent + * state, and so on a UP system, synchronize_sched() need do nothing. + * Ditto for synchronize_rcu_bh(). (But Lai Jiangshan points out the + * benefits of doing might_sleep() to reduce latency.) + * + * Cool, huh? (Due to Josh Triplett.) + * + * But we want to make this a static inline later. + */ +void synchronize_sched(void) +{ + cond_resched(); +} +EXPORT_SYMBOL_GPL(synchronize_sched); + +void synchronize_rcu_bh(void) +{ + synchronize_sched(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_bh); + +/* + * Helper function for call_rcu() and call_rcu_bh(). + */ +static void __call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu), + struct rcu_ctrlblk *rcp) +{ + unsigned long flags; + + head->func = func; + head->next = NULL; + local_irq_save(flags); + *rcp->curtail = head; + rcp->curtail = &head->next; + local_irq_restore(flags); +} + +/* + * Post an RCU callback to be invoked after the end of an RCU grace + * period. But since we have but one CPU, that would be after any + * quiescent state. + */ +void call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_ctrlblk); +} +EXPORT_SYMBOL_GPL(call_rcu); + +/* + * Post an RCU bottom-half callback to be invoked after any subsequent + * quiescent state. + */ +void call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_bh_ctrlblk); +} +EXPORT_SYMBOL_GPL(call_rcu_bh); + +void rcu_barrier(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier); + +void rcu_barrier_bh(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_bh(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier_bh); + +void rcu_barrier_sched(void) +{ + struct rcu_synchronize rcu; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_sched(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(rcu_barrier_sched); + +void __rcu_init(void) +{ + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); +} -- cgit v0.10.2 From 0cd397d33608ae6c97d2ee6c8c43462b419b7e26 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 25 Oct 2009 19:03:51 -0700 Subject: rcu: Add synchronize_srcu_expedited() This patch creates a synchronize_srcu_expedited() that uses synchronize_sched_expedited() where synchronize_srcu() uses synchronize_sched(). The synchronize_srcu() and synchronize_srcu_expedited() functions become one-liners that pass synchronize_sched() or synchronize_sched_expedited(), repectively, to a new __synchronize_srcu() function. While in the file, move the EXPORT_SYMBOL_GPL()s to immediately follow the corresponding functions. Requested-by: Avi Kivity Tested-by: Marcelo Tosatti Signed-off-by: Paul E. McKenney Acked-by: Josh Triplett Reviewed-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: avi@redhat.com LKML-Reference: <12565226354038-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/include/linux/srcu.h b/include/linux/srcu.h index aca0eee..4765d97 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -48,6 +48,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp); int srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); +void synchronize_srcu_expedited(struct srcu_struct *sp); long srcu_batches_completed(struct srcu_struct *sp); #endif diff --git a/kernel/srcu.c b/kernel/srcu.c index b0aeeaf..818d7d9 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c @@ -49,6 +49,7 @@ int init_srcu_struct(struct srcu_struct *sp) sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); return (sp->per_cpu_ref ? 0 : -ENOMEM); } +EXPORT_SYMBOL_GPL(init_srcu_struct); /* * srcu_readers_active_idx -- returns approximate number of readers @@ -97,6 +98,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp) free_percpu(sp->per_cpu_ref); sp->per_cpu_ref = NULL; } +EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /** * srcu_read_lock - register a new reader for an SRCU-protected structure. @@ -118,6 +120,7 @@ int srcu_read_lock(struct srcu_struct *sp) preempt_enable(); return idx; } +EXPORT_SYMBOL_GPL(srcu_read_lock); /** * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. @@ -136,22 +139,12 @@ void srcu_read_unlock(struct srcu_struct *sp, int idx) per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; preempt_enable(); } +EXPORT_SYMBOL_GPL(srcu_read_unlock); -/** - * synchronize_srcu - wait for prior SRCU read-side critical-section completion - * @sp: srcu_struct with which to synchronize. - * - * Flip the completed counter, and wait for the old count to drain to zero. - * As with classic RCU, the updater must use some separate means of - * synchronizing concurrent updates. Can block; must be called from - * process context. - * - * Note that it is illegal to call synchornize_srcu() from the corresponding - * SRCU read-side critical section; doing so will result in deadlock. - * However, it is perfectly legal to call synchronize_srcu() on one - * srcu_struct from some other srcu_struct's read-side critical section. +/* + * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). */ -void synchronize_srcu(struct srcu_struct *sp) +void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) { int idx; @@ -173,7 +166,7 @@ void synchronize_srcu(struct srcu_struct *sp) return; } - synchronize_sched(); /* Force memory barrier on all CPUs. */ + sync_func(); /* Force memory barrier on all CPUs. */ /* * The preceding synchronize_sched() ensures that any CPU that @@ -190,7 +183,7 @@ void synchronize_srcu(struct srcu_struct *sp) idx = sp->completed & 0x1; sp->completed++; - synchronize_sched(); /* Force memory barrier on all CPUs. */ + sync_func(); /* Force memory barrier on all CPUs. */ /* * At this point, because of the preceding synchronize_sched(), @@ -203,7 +196,7 @@ void synchronize_srcu(struct srcu_struct *sp) while (srcu_readers_active_idx(sp, idx)) schedule_timeout_interruptible(1); - synchronize_sched(); /* Force memory barrier on all CPUs. */ + sync_func(); /* Force memory barrier on all CPUs. */ /* * The preceding synchronize_sched() forces all srcu_read_unlock() @@ -237,6 +230,47 @@ void synchronize_srcu(struct srcu_struct *sp) } /** + * synchronize_srcu - wait for prior SRCU read-side critical-section completion + * @sp: srcu_struct with which to synchronize. + * + * Flip the completed counter, and wait for the old count to drain to zero. + * As with classic RCU, the updater must use some separate means of + * synchronizing concurrent updates. Can block; must be called from + * process context. + * + * Note that it is illegal to call synchronize_srcu() from the corresponding + * SRCU read-side critical section; doing so will result in deadlock. + * However, it is perfectly legal to call synchronize_srcu() on one + * srcu_struct from some other srcu_struct's read-side critical section. + */ +void synchronize_srcu(struct srcu_struct *sp) +{ + __synchronize_srcu(sp, synchronize_sched); +} +EXPORT_SYMBOL_GPL(synchronize_srcu); + +/** + * synchronize_srcu_expedited - like synchronize_srcu, but less patient + * @sp: srcu_struct with which to synchronize. + * + * Flip the completed counter, and wait for the old count to drain to zero. + * As with classic RCU, the updater must use some separate means of + * synchronizing concurrent updates. Can block; must be called from + * process context. + * + * Note that it is illegal to call synchronize_srcu_expedited() + * from the corresponding SRCU read-side critical section; doing so + * will result in deadlock. However, it is perfectly legal to call + * synchronize_srcu_expedited() on one srcu_struct from some other + * srcu_struct's read-side critical section. + */ +void synchronize_srcu_expedited(struct srcu_struct *sp) +{ + __synchronize_srcu(sp, synchronize_sched_expedited); +} +EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); + +/** * srcu_batches_completed - return batches completed. * @sp: srcu_struct on which to report batch completion. * @@ -248,10 +282,4 @@ long srcu_batches_completed(struct srcu_struct *sp) { return sp->completed; } - -EXPORT_SYMBOL_GPL(init_srcu_struct); -EXPORT_SYMBOL_GPL(cleanup_srcu_struct); -EXPORT_SYMBOL_GPL(srcu_read_lock); -EXPORT_SYMBOL_GPL(srcu_read_unlock); -EXPORT_SYMBOL_GPL(synchronize_srcu); EXPORT_SYMBOL_GPL(srcu_batches_completed); -- cgit v0.10.2 From 804bb8370522a569bd3a732b9de5fbd55e26f155 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 25 Oct 2009 19:03:52 -0700 Subject: rcu: Add synchronize_srcu_expedited() to the rcutorture test suite Adds the "srcu_expedited" torture type, and also renames sched_ops_sync to sched_sync_ops for consistency while we are in this file. Signed-off-by: Paul E. McKenney Acked-by: Josh Triplett Reviewed-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: avi@redhat.com Cc: mtosatti@redhat.com LKML-Reference: <12565226353636-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 697c0a0..14480e8 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -547,6 +547,25 @@ static struct rcu_torture_ops srcu_ops = { .name = "srcu" }; +static void srcu_torture_synchronize_expedited(void) +{ + synchronize_srcu_expedited(&srcu_ctl); +} + +static struct rcu_torture_ops srcu_expedited_ops = { + .init = srcu_torture_init, + .cleanup = srcu_torture_cleanup, + .readlock = srcu_torture_read_lock, + .read_delay = srcu_read_delay, + .readunlock = srcu_torture_read_unlock, + .completed = srcu_torture_completed, + .deferred_free = rcu_sync_torture_deferred_free, + .sync = srcu_torture_synchronize_expedited, + .cb_barrier = NULL, + .stats = srcu_torture_stats, + .name = "srcu_expedited" +}; + /* * Definitions for sched torture testing. */ @@ -592,7 +611,7 @@ static struct rcu_torture_ops sched_ops = { .name = "sched" }; -static struct rcu_torture_ops sched_ops_sync = { +static struct rcu_torture_ops sched_sync_ops = { .init = rcu_sync_torture_init, .cleanup = NULL, .readlock = sched_torture_read_lock, @@ -1098,8 +1117,8 @@ rcu_torture_init(void) int firsterr = 0; static struct rcu_torture_ops *torture_ops[] = { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, - &sched_expedited_ops, - &srcu_ops, &sched_ops, &sched_ops_sync, }; + &srcu_ops, &srcu_expedited_ops, + &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; mutex_lock(&fullstop_mutex); -- cgit v0.10.2 From 64179861cb801eac4f00c79f39a29ea5ac9470d7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 25 Oct 2009 19:03:53 -0700 Subject: rcu: Add synchronize_srcu_expedited() to the documentation Signed-off-by: Paul E. McKenney Acked-by: Josh Triplett Reviewed-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: avi@redhat.com Cc: mtosatti@redhat.com LKML-Reference: <12565226354176-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index e41a7fe..d542ca2 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -830,7 +830,7 @@ sched: Critical sections Grace period Barrier SRCU: Critical sections Grace period Barrier srcu_read_lock synchronize_srcu N/A - srcu_read_unlock + srcu_read_unlock synchronize_srcu_expedited SRCU: Initialization/cleanup init_srcu_struct -- cgit v0.10.2 From cf886c44ec418a01b2c52493465accb81acbf930 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 25 Oct 2009 19:03:54 -0700 Subject: rcu: Improve rcutorture diagnostics when bad torture_type specified Make rcutorture list the available torture_type values when it doesn't like the one specified. Signed-off-by: Paul E. McKenney Acked-by: Josh Triplett Reviewed-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: avi@redhat.com Cc: mtosatti@redhat.com LKML-Reference: <12565226351868-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 14480e8..3dd0ca2 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -1129,8 +1129,12 @@ rcu_torture_init(void) break; } if (i == ARRAY_SIZE(torture_ops)) { - printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n", + printk(KERN_ALERT "rcu-torture: invalid torture type: \"%s\"\n", torture_type); + printk(KERN_ALERT "rcu-torture types:"); + for (i = 0; i < ARRAY_SIZE(torture_ops); i++) + printk(KERN_ALERT " %s", torture_ops[i]->name); + printk(KERN_ALERT "\n"); mutex_unlock(&fullstop_mutex); return -EINVAL; } -- cgit v0.10.2 From 4ce5b90340879ce93d169b7b523c2cbbe7c45843 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Oct 2009 07:55:55 +0100 Subject: rcu: Do tiny cleanups in rcutiny No change in functionality - just straighten out a few small stylistic details. Cc: Paul E. McKenney Cc: David Howells Cc: Josh Triplett Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: avi@redhat.com Cc: mtosatti@redhat.com LKML-Reference: <12565226351355-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 891073c..2c1fe83 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -20,9 +20,8 @@ * Author: Paul E. McKenney * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU + * Documentation/RCU */ - #ifndef __LINUX_TINY_H #define __LINUX_TINY_H @@ -70,8 +69,7 @@ static inline void synchronize_rcu_bh_expedited(void) } struct notifier_block; -extern int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); +extern int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); #ifdef CONFIG_NO_HZ diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 0b54efd..b33ec3aa 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -20,22 +20,21 @@ * Author: Paul E. McKenney * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU + * Documentation/RCU */ - -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include #include -#include +#include +#include +#include #include +#include +#include +#include #include +#include /* Global control variables for rcupdate callback mechanism. */ struct rcu_ctrlblk { @@ -46,14 +45,13 @@ struct rcu_ctrlblk { /* Definition for rcupdate control block. */ static struct rcu_ctrlblk rcu_ctrlblk = { - .rcucblist = NULL, - .donetail = &rcu_ctrlblk.rcucblist, - .curtail = &rcu_ctrlblk.rcucblist, + .donetail = &rcu_ctrlblk.rcucblist, + .curtail = &rcu_ctrlblk.rcucblist, }; + static struct rcu_ctrlblk rcu_bh_ctrlblk = { - .rcucblist = NULL, - .donetail = &rcu_bh_ctrlblk.rcucblist, - .curtail = &rcu_bh_ctrlblk.rcucblist, + .donetail = &rcu_bh_ctrlblk.rcucblist, + .curtail = &rcu_bh_ctrlblk.rcucblist, }; #ifdef CONFIG_NO_HZ @@ -84,8 +82,8 @@ void rcu_exit_nohz(void) /* * Helper function for rcu_qsctr_inc() and rcu_bh_qsctr_inc(). - * Also disable irqs to avoid confusion due to interrupt handlers invoking - * call_rcu(). + * Also disable irqs to avoid confusion due to interrupt handlers + * invoking call_rcu(). */ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) { @@ -99,6 +97,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) return 1; } local_irq_restore(flags); + return 0; } @@ -143,8 +142,8 @@ void rcu_check_callbacks(int cpu, int user) */ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) { - unsigned long flags; struct rcu_head *next, *list; + unsigned long flags; /* If no RCU callbacks ready to invoke, just return. */ if (&rcp->rcucblist == rcp->donetail) @@ -182,8 +181,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) * Null function to handle CPU being onlined. Longer term, we want to * make TINY_RCU avoid using rcupdate.c, but later... */ -int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { return NOTIFY_OK; } @@ -223,6 +221,7 @@ static void __call_rcu(struct rcu_head *head, head->func = func; head->next = NULL; + local_irq_save(flags); *rcp->curtail = head; rcp->curtail = &head->next; @@ -234,8 +233,7 @@ static void __call_rcu(struct rcu_head *head, * period. But since we have but one CPU, that would be after any * quiescent state. */ -void call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { __call_rcu(head, func, &rcu_ctrlblk); } @@ -245,8 +243,7 @@ EXPORT_SYMBOL_GPL(call_rcu); * Post an RCU bottom-half callback to be invoked after any subsequent * quiescent state. */ -void call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) +void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { __call_rcu(head, func, &rcu_bh_ctrlblk); } -- cgit v0.10.2 From fcd14b3203b538dca04a2b065c774c0b57863eec Mon Sep 17 00:00:00 2001 From: Michael Cree Date: Mon, 26 Oct 2009 21:32:06 +1300 Subject: perf tools, Alpha: Add Alpha support to perf.h For the perf tool the patch implements an Alpha specific section in the perf.h header file. Signed-off-by: Michael Cree Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1256545926-6972-1-git-send-email-mcree@orcon.net.nz> Signed-off-by: Ingo Molnar diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 8cc4623..216bdb2 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -47,6 +47,12 @@ #define cpu_relax() asm volatile("":::"memory") #endif +#ifdef __alpha__ +#include "../../arch/alpha/include/asm/unistd.h" +#define rmb() asm volatile("mb" ::: "memory") +#define cpu_relax() asm volatile("" ::: "memory") +#endif + #include #include #include -- cgit v0.10.2 From 88b91c7ca49bc8600cf1106eb891d08c1965b9ce Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 26 Oct 2009 10:24:31 -0700 Subject: rcu: Simplify creating of lockdep class for root rcu_node Use lockdep_set_class() to simplify the code and to avoid any additional overhead in the !LOCKDEP case. Also move the definition of rcu_root_class into kernel/rcutree.c, as suggested by Lai Jiangshan. Signed-off-by: Peter Zijlstra Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1256577871443-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ddbf111..055f1a9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -51,6 +51,8 @@ /* Data structures. */ +static struct lock_class_key rcu_root_class; + #define RCU_STATE_INITIALIZER(name) { \ .level = { &name.node[0] }, \ .levelcnt = { \ @@ -1666,8 +1668,7 @@ static void __init rcu_init_one(struct rcu_state *rsp) cpustride *= rsp->levelspread[i]; rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { - if (rnp != rcu_get_root(rsp)) - spin_lock_init(&rnp->lock); + spin_lock_init(&rnp->lock); rnp->gpnum = 0; rnp->qsmask = 0; rnp->qsmaskinit = 0; @@ -1690,7 +1691,7 @@ static void __init rcu_init_one(struct rcu_state *rsp) INIT_LIST_HEAD(&rnp->blocked_tasks[1]); } } - spin_lock_init(&rcu_get_root(rsp)->lock); + lockdep_set_class(&rcu_get_root(rsp)->lock, &rcu_root_class); } /* -- cgit v0.10.2 From 7f3bedcc93f935631d2363f23de1cc80f04fdf3e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 Oct 2009 19:23:17 -0200 Subject: perf record: Fix race where process can disappear while reading its /proc/pid/tasks Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256592199-9608-1-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ac5ddff..9e1638c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -206,6 +206,7 @@ static pid_t pid_synthesize_comm_event(pid_t pid, int full) fp = fopen(filename, "r"); if (fp == NULL) { +out_race: /* * We raced with a task exiting - just return: */ @@ -247,6 +248,9 @@ static pid_t pid_synthesize_comm_event(pid_t pid, int full) snprintf(filename, sizeof(filename), "/proc/%d/task", pid); tasks = opendir(filename); + if (tasks == NULL) + goto out_race; + while (!readdir_r(tasks, &dirent, &next) && next) { char *end; pid = strtol(dirent.d_name, &end, 10); -- cgit v0.10.2 From 234fbbf508c58c5084292b11b242377553897459 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 Oct 2009 19:23:18 -0200 Subject: perf tools: Generalize event synthesizing routines Because we will need it in 'perf top' to support userspace symbols for existing threads. Now we pass a callback that will receive the synthesized event and then write it to the output file in 'perf record' and in the upcoming patch for 'perf top' we will just immediatelly create the in memory representation of threads and maps. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256592199-9608-2-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0a40c29..9f4488d 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -380,6 +380,7 @@ LIB_OBJS += util/alias.o LIB_OBJS += util/config.o LIB_OBJS += util/ctype.o LIB_OBJS += util/environment.o +LIB_OBJS += util/event.o LIB_OBJS += util/exec_cmd.o LIB_OBJS += util/help.o LIB_OBJS += util/levenshtein.o diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 9e1638c..4a73d89 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -109,6 +109,12 @@ static void write_output(void *buf, size_t size) } } +static int process_synthesized_event(event_t *event) +{ + write_output(event, event->header.size); + return 0; +} + static void mmap_read(struct mmap_data *md) { unsigned int head = mmap_read_head(md); @@ -191,172 +197,6 @@ static void sig_atexit(void) kill(getpid(), signr); } -static pid_t pid_synthesize_comm_event(pid_t pid, int full) -{ - struct comm_event comm_ev; - char filename[PATH_MAX]; - char bf[BUFSIZ]; - FILE *fp; - size_t size = 0; - DIR *tasks; - struct dirent dirent, *next; - pid_t tgid = 0; - - snprintf(filename, sizeof(filename), "/proc/%d/status", pid); - - fp = fopen(filename, "r"); - if (fp == NULL) { -out_race: - /* - * We raced with a task exiting - just return: - */ - if (verbose) - fprintf(stderr, "couldn't open %s\n", filename); - return 0; - } - - memset(&comm_ev, 0, sizeof(comm_ev)); - while (!comm_ev.comm[0] || !comm_ev.pid) { - if (fgets(bf, sizeof(bf), fp) == NULL) - goto out_failure; - - if (memcmp(bf, "Name:", 5) == 0) { - char *name = bf + 5; - while (*name && isspace(*name)) - ++name; - size = strlen(name) - 1; - memcpy(comm_ev.comm, name, size++); - } else if (memcmp(bf, "Tgid:", 5) == 0) { - char *tgids = bf + 5; - while (*tgids && isspace(*tgids)) - ++tgids; - tgid = comm_ev.pid = atoi(tgids); - } - } - - comm_ev.header.type = PERF_RECORD_COMM; - size = ALIGN(size, sizeof(u64)); - comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); - - if (!full) { - comm_ev.tid = pid; - - write_output(&comm_ev, comm_ev.header.size); - goto out_fclose; - } - - snprintf(filename, sizeof(filename), "/proc/%d/task", pid); - - tasks = opendir(filename); - if (tasks == NULL) - goto out_race; - - while (!readdir_r(tasks, &dirent, &next) && next) { - char *end; - pid = strtol(dirent.d_name, &end, 10); - if (*end) - continue; - - comm_ev.tid = pid; - - write_output(&comm_ev, comm_ev.header.size); - } - closedir(tasks); - -out_fclose: - fclose(fp); - return tgid; - -out_failure: - fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", - filename); - exit(EXIT_FAILURE); -} - -static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid) -{ - char filename[PATH_MAX]; - FILE *fp; - - snprintf(filename, sizeof(filename), "/proc/%d/maps", pid); - - fp = fopen(filename, "r"); - if (fp == NULL) { - /* - * We raced with a task exiting - just return: - */ - if (verbose) - fprintf(stderr, "couldn't open %s\n", filename); - return; - } - while (1) { - char bf[BUFSIZ], *pbf = bf; - struct mmap_event mmap_ev = { - .header = { .type = PERF_RECORD_MMAP }, - }; - int n; - size_t size; - if (fgets(bf, sizeof(bf), fp) == NULL) - break; - - /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = hex2u64(pbf, &mmap_ev.start); - if (n < 0) - continue; - pbf += n + 1; - n = hex2u64(pbf, &mmap_ev.len); - if (n < 0) - continue; - pbf += n + 3; - if (*pbf == 'x') { /* vm_exec */ - char *execname = strchr(bf, '/'); - - /* Catch VDSO */ - if (execname == NULL) - execname = strstr(bf, "[vdso]"); - - if (execname == NULL) - continue; - - size = strlen(execname); - execname[size - 1] = '\0'; /* Remove \n */ - memcpy(mmap_ev.filename, execname, size); - size = ALIGN(size, sizeof(u64)); - mmap_ev.len -= mmap_ev.start; - mmap_ev.header.size = (sizeof(mmap_ev) - - (sizeof(mmap_ev.filename) - size)); - mmap_ev.pid = tgid; - mmap_ev.tid = pid; - - write_output(&mmap_ev, mmap_ev.header.size); - } - } - - fclose(fp); -} - -static void synthesize_all(void) -{ - DIR *proc; - struct dirent dirent, *next; - - proc = opendir("/proc"); - - while (!readdir_r(proc, &dirent, &next) && next) { - char *end; - pid_t pid, tgid; - - pid = strtol(dirent.d_name, &end, 10); - if (*end) /* only interested in proper numerical dirents */ - continue; - - tgid = pid_synthesize_comm_event(pid, 1); - pid_synthesize_mmap_samples(pid, tgid); - } - - closedir(proc); -} - static int group_fd; static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr) @@ -608,11 +448,10 @@ static int __cmd_record(int argc, const char **argv) if (file_new) perf_header__write(header, output); - if (!system_wide) { - pid_t tgid = pid_synthesize_comm_event(pid, 0); - pid_synthesize_mmap_samples(pid, tgid); - } else - synthesize_all(); + if (!system_wide) + event__synthesize_thread(pid, process_synthesized_event); + else + event__synthesize_threads(process_synthesized_event); if (target_pid == -1 && argc) { pid = fork(); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c new file mode 100644 index 0000000..1dae7e3 --- /dev/null +++ b/tools/perf/util/event.c @@ -0,0 +1,177 @@ +#include +#include "event.h" +#include "debug.h" +#include "string.h" + +static pid_t event__synthesize_comm(pid_t pid, int full, + int (*process)(event_t *event)) +{ + event_t ev; + char filename[PATH_MAX]; + char bf[BUFSIZ]; + FILE *fp; + size_t size = 0; + DIR *tasks; + struct dirent dirent, *next; + pid_t tgid = 0; + + snprintf(filename, sizeof(filename), "/proc/%d/status", pid); + + fp = fopen(filename, "r"); + if (fp == NULL) { +out_race: + /* + * We raced with a task exiting - just return: + */ + pr_debug("couldn't open %s\n", filename); + return 0; + } + + memset(&ev.comm, 0, sizeof(ev.comm)); + while (!ev.comm.comm[0] || !ev.comm.pid) { + if (fgets(bf, sizeof(bf), fp) == NULL) + goto out_failure; + + if (memcmp(bf, "Name:", 5) == 0) { + char *name = bf + 5; + while (*name && isspace(*name)) + ++name; + size = strlen(name) - 1; + memcpy(ev.comm.comm, name, size++); + } else if (memcmp(bf, "Tgid:", 5) == 0) { + char *tgids = bf + 5; + while (*tgids && isspace(*tgids)) + ++tgids; + tgid = ev.comm.pid = atoi(tgids); + } + } + + ev.comm.header.type = PERF_RECORD_COMM; + size = ALIGN(size, sizeof(u64)); + ev.comm.header.size = sizeof(ev.comm) - (sizeof(ev.comm.comm) - size); + + if (!full) { + ev.comm.tid = pid; + + process(&ev); + goto out_fclose; + } + + snprintf(filename, sizeof(filename), "/proc/%d/task", pid); + + tasks = opendir(filename); + if (tasks == NULL) + goto out_race; + + while (!readdir_r(tasks, &dirent, &next) && next) { + char *end; + pid = strtol(dirent.d_name, &end, 10); + if (*end) + continue; + + ev.comm.tid = pid; + + process(&ev); + } + closedir(tasks); + +out_fclose: + fclose(fp); + return tgid; + +out_failure: + pr_warning("couldn't get COMM and pgid, malformed %s\n", filename); + return -1; +} + +static int event__synthesize_mmap_events(pid_t pid, pid_t tgid, + int (*process)(event_t *event)) +{ + char filename[PATH_MAX]; + FILE *fp; + + snprintf(filename, sizeof(filename), "/proc/%d/maps", pid); + + fp = fopen(filename, "r"); + if (fp == NULL) { + /* + * We raced with a task exiting - just return: + */ + pr_debug("couldn't open %s\n", filename); + return -1; + } + + while (1) { + char bf[BUFSIZ], *pbf = bf; + event_t ev = { + .header = { .type = PERF_RECORD_MMAP }, + }; + int n; + size_t size; + if (fgets(bf, sizeof(bf), fp) == NULL) + break; + + /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ + n = hex2u64(pbf, &ev.mmap.start); + if (n < 0) + continue; + pbf += n + 1; + n = hex2u64(pbf, &ev.mmap.len); + if (n < 0) + continue; + pbf += n + 3; + if (*pbf == 'x') { /* vm_exec */ + char *execname = strchr(bf, '/'); + + /* Catch VDSO */ + if (execname == NULL) + execname = strstr(bf, "[vdso]"); + + if (execname == NULL) + continue; + + size = strlen(execname); + execname[size - 1] = '\0'; /* Remove \n */ + memcpy(ev.mmap.filename, execname, size); + size = ALIGN(size, sizeof(u64)); + ev.mmap.len -= ev.mmap.start; + ev.mmap.header.size = (sizeof(ev.mmap) - + (sizeof(ev.mmap.filename) - size)); + ev.mmap.pid = tgid; + ev.mmap.tid = pid; + + process(&ev); + } + } + + fclose(fp); + return 0; +} + +int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)) +{ + pid_t tgid = event__synthesize_comm(pid, 1, process); + if (tgid == -1) + return -1; + return event__synthesize_mmap_events(pid, tgid, process); +} + +void event__synthesize_threads(int (*process)(event_t *event)) +{ + DIR *proc; + struct dirent dirent, *next; + + proc = opendir("/proc"); + + while (!readdir_r(proc, &dirent, &next) && next) { + char *end; + pid_t pid = strtol(dirent.d_name, &end, 10); + + if (*end) /* only interested in proper numerical dirents */ + continue; + + event__synthesize_thread(pid, process); + } + + closedir(proc); +} diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index d972b4b..2ae1177 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -111,4 +111,7 @@ struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); +int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)); +void event__synthesize_threads(int (*process)(event_t *event)); + #endif /* __PERF_RECORD_H */ -- cgit v0.10.2 From 5b2bb75a0d4b08cd16bc35ecd674f957fc3b0eb7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 Oct 2009 19:23:19 -0200 Subject: perf top: Support userspace symbols too Example: Compiling the kernel with 'make -k 22 allyesconfig' [root@emilia linux-2.6-tip]# perf top -r 90 ------------------------------------------------------------------------------ PerfTop: 3669 irqs/sec kernel:59.9% [1000Hz cycles], (all, 8 CPUs) ------------------------------------------------------------------------------ samples pcnt function DSO _______ _____ ________________________________ ________________ 3062.00 6.5% clear_page_c [kernel] 2233.00 4.8% _int_malloc /lib64/libc-2.5.so 2100.00 4.5% yylex /home/acme/git/build/allyesconfig/scripts/genksyms/genksyms 2029.00 4.3% memset /lib64/libc-2.5.so 1224.00 2.6% page_fault [kernel] 1075.00 2.3% __GI_strlen /lib64/libc-2.5.so 863.00 1.8% sub_preempt_count [kernel] 822.00 1.8% __GI_memcpy /lib64/libc-2.5.so 810.00 1.7% __GI_vfprintf /lib64/libc-2.5.so 786.00 1.7% _int_free /lib64/libc-2.5.so 775.00 1.7% __GI_strcmp /lib64/libc-2.5.so 748.00 1.6% _spin_lock [kernel] 699.00 1.5% main /home/acme/git/build/allyesconfig/scripts/basic/fixdep 659.00 1.4% add_preempt_count [kernel] 649.00 1.4% yyparse /home/acme/git/build/allyesconfig/scripts/genksyms/genksyms 645.00 1.4% preempt_trace [kernel] 635.00 1.4% __GI___libc_free /lib64/libc-2.5.so 597.00 1.3% trace_preempt_on [kernel] 551.00 1.2% __GI___libc_malloc /lib64/libc-2.5.so 516.00 1.1% _spin_lock_irqsave [kernel] 481.00 1.0% copy_user_generic_string [kernel] 479.00 1.0% unmap_vmas [kernel] 429.00 0.9% _IO_file_xsputn_internal /lib64/libc-2.5.so 425.00 0.9% __GI_strncpy /lib64/libc-2.5.so 416.00 0.9% get_page_from_freelist [kernel] 414.00 0.9% malloc_consolidate /lib64/libc-2.5.so 406.00 0.9% get_parent_ip [kernel] 362.00 0.8% __rmqueue [kernel] 347.00 0.7% in_lock_functions [kernel] 316.00 0.7% __d_lookup [kernel] [root@emilia linux-2.6-tip]# More polishing is needed to print just DSO basename when not --verbose, etc. Supporting a 'comm' column requires some more reworking of 'perf top' internals as we will need to use something like the hist entries 'perf report' uses and will be done in another patch. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256592199-9608-3-git-send-email-acme@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4a9fe22..a02fc41 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -318,7 +318,7 @@ static void show_details(struct sym_entry *syme) } /* - * Symbols will be added here in record_ip and will get out + * Symbols will be added here in event__process_sample and will get out * after decayed. */ static LIST_HEAD(active_symbols); @@ -459,18 +459,18 @@ static void print_sym_table(void) } if (nr_counters == 1) - printf(" samples pcnt"); + printf(" samples pcnt"); else - printf(" weight samples pcnt"); + printf(" weight samples pcnt"); if (verbose) printf(" RIP "); - printf(" kernel function\n"); - printf(" %s _______ _____", + printf(" function DSO\n"); + printf(" %s _______ _____", nr_counters == 1 ? " " : "______"); if (verbose) - printf(" ________________"); - printf(" _______________\n\n"); + printf(" ________________"); + printf(" ________________________________ ________________\n\n"); for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { struct symbol *sym; @@ -486,16 +486,15 @@ static void print_sym_table(void) sum_ksamples)); if (nr_counters == 1 || !display_weighted) - printf("%20.2f - ", syme->weight); + printf("%20.2f ", syme->weight); else - printf("%9.1f %10ld - ", syme->weight, syme->snap_count); + printf("%9.1f %10ld ", syme->weight, syme->snap_count); percent_color_fprintf(stdout, "%4.1f%%", pcnt); if (verbose) - printf(" - %016llx", sym->start); - printf(" : %s", sym->name); - if (syme->map->dso->name[0] == '[') - printf(" \t%s", syme->map->dso->name); + printf(" %016llx", sym->start); + printf(" %-32s", sym->name); + printf(" %s", syme->map->dso->short_name); printf("\n"); } } @@ -818,41 +817,97 @@ static int parse_symbols(void) return 0; } -/* - * Binary search in the histogram table and record the hit: - */ -static void record_ip(u64 ip, int counter) +static void event__process_sample(const event_t *self, int counter) { + u64 ip = self->ip.ip; struct map *map; - struct symbol *sym = kernel_maps__find_symbol(ip, &map); - - if (sym != NULL) { - struct sym_entry *syme = dso__sym_priv(map->dso, sym); - - if (!syme->skip) { - syme->count[counter]++; - record_precise_ip(syme, counter, ip); - pthread_mutex_lock(&active_symbols_lock); - if (list_empty(&syme->node) || !syme->node.next) - __list_insert_active_sym(syme); - pthread_mutex_unlock(&active_symbols_lock); + struct sym_entry *syme; + struct symbol *sym; + + switch (self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) { + case PERF_RECORD_MISC_USER: { + struct thread *thread = threads__findnew(self->ip.pid); + + if (thread == NULL) return; + + map = thread__find_map(thread, ip); + if (map != NULL) { + ip = map->map_ip(map, ip); + sym = map->dso->find_symbol(map->dso, ip); + if (sym == NULL) + return; + userspace_samples++; + break; } } + /* + * If this is outside of all known maps, + * and is a negative address, try to look it + * up in the kernel dso, as it might be a + * vsyscall or vdso (which executes in user-mode). + */ + if ((long long)ip >= 0) + return; + /* Fall thru */ + case PERF_RECORD_MISC_KERNEL: + sym = kernel_maps__find_symbol(ip, &map); + if (sym == NULL) + return; + break; + default: + return; + } + + syme = dso__sym_priv(map->dso, sym); - samples--; + if (!syme->skip) { + syme->count[counter]++; + record_precise_ip(syme, counter, ip); + pthread_mutex_lock(&active_symbols_lock); + if (list_empty(&syme->node) || !syme->node.next) + __list_insert_active_sym(syme); + pthread_mutex_unlock(&active_symbols_lock); + ++samples; + return; + } } -static void process_event(u64 ip, int counter, int user) +static void event__process_mmap(event_t *self) { - samples++; + struct thread *thread = threads__findnew(self->mmap.pid); + + if (thread != NULL) { + struct map *map = map__new(&self->mmap, NULL, 0, + sizeof(struct sym_entry), + symbol_filter); + if (map != NULL) + thread__insert_map(thread, map); + } +} - if (user) { - userspace_samples++; - return; +static void event__process_comm(event_t *self) +{ + struct thread *thread = threads__findnew(self->comm.pid); + + if (thread != NULL) + thread__set_comm(thread, self->comm.comm); +} + +static int event__process(event_t *event) +{ + switch (event->header.type) { + case PERF_RECORD_COMM: + event__process_comm(event); + break; + case PERF_RECORD_MMAP: + event__process_mmap(event); + break; + default: + break; } - record_ip(ip, counter); + return 0; } struct mmap_data { @@ -925,13 +980,11 @@ static void mmap_read_counter(struct mmap_data *md) event = &event_copy; } + if (event->header.type == PERF_RECORD_SAMPLE) + event__process_sample(event, md->counter); + else + event__process(event); old += size; - - if (event->header.type == PERF_RECORD_SAMPLE) { - int user = - (event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER; - process_event(event->ip.ip, md->counter, user); - } } md->prev = old; @@ -973,6 +1026,7 @@ static void start_counter(int i, int counter) } attr->inherit = (cpu < 0) && inherit; + attr->mmap = 1; try_again: fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); @@ -1031,6 +1085,11 @@ static int __cmd_top(void) int i, counter; int ret; + if (target_pid != -1) + event__synthesize_thread(target_pid, event__process); + else + event__synthesize_threads(event__process); + for (i = 0; i < nr_cpus; i++) { group_fd = -1; for (counter = 0; counter < nr_counters; counter++) -- cgit v0.10.2 From 85df6f683efa457440eb922272fd5a71aa022ad4 Mon Sep 17 00:00:00 2001 From: Marti Raudsepp Date: Tue, 27 Oct 2009 00:33:04 +0000 Subject: perf tools: Notify user when unrecognized event is specified Previously no indication was given about what went wrong. Signed-off-by: Marti Raudsepp Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <03ec9ee96f17cef05424.1256603584@localhost> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index b097570..e9e6d5c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -678,6 +678,8 @@ parse_event_symbols(const char **str, struct perf_event_attr *attr) if (ret != EVT_FAILED) goto modifier; + fprintf(stderr, "invalid or unsupported event: '%s'\n", *str); + fprintf(stderr, "Run 'perf list' for a list of valid events\n"); return EVT_FAILED; modifier: -- cgit v0.10.2 From 689d30187828afe1faedf050b2f7593515b90c76 Mon Sep 17 00:00:00 2001 From: Marti Raudsepp Date: Tue, 27 Oct 2009 00:33:05 +0000 Subject: perf tools: Output 'perf list' to stdout not stderr Writing to stdout is probably the expected behavior because the user explicitly asked for a list. Signed-off-by: Marti Raudsepp Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <4ebb59420ef057972167.1256603585@localhost> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index e9e6d5c..31baa5a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -806,7 +806,7 @@ static void print_tracepoint_events(void) for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); - fprintf(stderr, " %-42s [%s]\n", evt_path, + printf(" %-42s [%s]\n", evt_path, event_type_descriptors[PERF_TYPE_TRACEPOINT+1]); } closedir(evt_dir); @@ -823,8 +823,8 @@ void print_events(void) unsigned int i, type, op, prev_type = -1; char name[40]; - fprintf(stderr, "\n"); - fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); + printf("\n"); + printf("List of pre-defined events (to be used in -e):\n"); for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { type = syms->type + 1; @@ -832,19 +832,19 @@ void print_events(void) type = 0; if (type != prev_type) - fprintf(stderr, "\n"); + printf("\n"); if (strlen(syms->alias)) sprintf(name, "%s OR %s", syms->symbol, syms->alias); else strcpy(name, syms->symbol); - fprintf(stderr, " %-42s [%s]\n", name, + printf(" %-42s [%s]\n", name, event_type_descriptors[type]); prev_type = type; } - fprintf(stderr, "\n"); + printf("\n"); for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ @@ -852,17 +852,17 @@ void print_events(void) continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - fprintf(stderr, " %-42s [%s]\n", + printf(" %-42s [%s]\n", event_cache_name(type, op, i), event_type_descriptors[4]); } } } - fprintf(stderr, "\n"); - fprintf(stderr, " %-42s [raw hardware event descriptor]\n", + printf("\n"); + printf(" %-42s [raw hardware event descriptor]\n", "rNNN"); - fprintf(stderr, "\n"); + printf("\n"); print_tracepoint_events(); -- cgit v0.10.2 From 6f9b41006af1bc489030f84ee247abc0df1edccd Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 27 Oct 2009 11:01:38 +0100 Subject: x86, apic: Clear APIC Timer Initial Count Register on shutdown Commit a98f8fd24fb24fcb9a359553e64dd6aac5cf4279 (x86: apic reset counter on shutdown) set the counter to max to avoid spurious interrupts when the timer is re-enabled. (In theory) you'll still get a spurious interrupt if spending more than 344 seconds with this interrupt disabled and then unmasking it. The right thing to do is to clear the register. This disables the interrupt from happening (at least it does on AMD hardware). Signed-off-by: Andreas Herrmann LKML-Reference: <20091027100138.GB30802@alberich.amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index dce93d4..4c689f4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -444,7 +444,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, v = apic_read(APIC_LVTT); v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); apic_write(APIC_LVTT, v); - apic_write(APIC_TMICT, 0xffffffff); + apic_write(APIC_TMICT, 0); break; case CLOCK_EVT_MODE_RESUME: /* Nothing to do here */ -- cgit v0.10.2 From 2c28e2451dba2260e9f88811b29a7787db7e7616 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 26 Oct 2009 13:57:44 -0700 Subject: rcu: Fix TINY_RCU #elif condition Some compilers are happy with "#elif CONFIG_RCU_TINY", while others strongly prefer "#elif defined(CONFIG_RCU_TINY)". Change to the latter to make more compilers happy. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12565906642768-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6dd71fa..2f1bc42 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -77,7 +77,7 @@ extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include -#elif CONFIG_TINY_RCU +#elif defined(CONFIG_TINY_RCU) #include #else #error "Unknown RCU implementation specified to kernel configuration" -- cgit v0.10.2 From d6ba452128178091dab7a04d54f7e66fdc32fb39 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Mon, 26 Oct 2009 09:26:18 -0400 Subject: tpm add default function definitions Add default tpm_pcr_read/extend function definitions required by IMA/Kconfig changes. Signed-off-by: Mimi Zohar Reviewed-by: Eric Paris Signed-off-by: James Morris diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 3338b3f..8eaa8f8 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -31,5 +31,12 @@ extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf); extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash); +#else +static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) { + return -ENODEV; +} +static inline int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) { + return -ENODEV; +} #endif #endif -- cgit v0.10.2 From bc284e5d9d6da48934a177db92bf8e09b96a9cb8 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 21 Sep 2009 16:56:10 +0000 Subject: powerpc: perf_event: Log invalid data addresses as all 1s When we take an exception and the SDAR isn't synchronised we currently log 0 as the address. Unfortunately this is a pretty common value, so use ~0UL instead. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index bbcbae1..73c3b73 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1162,7 +1162,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, */ if (record) { struct perf_sample_data data = { - .addr = 0, + .addr = ~0ULL, .period = event->hw.last_period, }; -- cgit v0.10.2 From 81cd5ae303e88a1e9d3a3e0f1fe8abd100edde16 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 27 Oct 2009 18:31:29 +0000 Subject: powerpc: perf_event: Enable SDAR in continous sample mode In continuous sampling mode we want the SDAR to update. While we can select between dcache misses and ERAT (L1-TLB) misses, a decent default is to enable both. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 6315edc..bc8dd53 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -489,6 +489,8 @@ #define SPRN_MMCR1 798 #define SPRN_MMCRA 0x312 #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ +#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL +#define MMCRA_SDAR_ERAT_MISS 0x20000000UL #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 0f4c1c7..199de52 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c @@ -73,10 +73,6 @@ #define MMCR1_PMCSEL_MSK 0x7f /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index c351b3a..98b6a72 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c @@ -73,10 +73,6 @@ #define MMCR1_PMCSEL_MSK 0x7f /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 @@ -390,7 +386,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; unsigned int pmc, unit, byte, psel; unsigned int ttm, grp; int i, isbus, bit, grsel; diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index ca399ba..84a607b 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -178,7 +178,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; int i; unsigned int pmc, ev, b, u, s, psel; unsigned int ttmset = 0; diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 28a4daa..852f7b7 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -51,10 +51,6 @@ #define MMCR1_PMCSEL_MSK 0xff /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 @@ -230,7 +226,7 @@ static int power7_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[]) { unsigned long mmcr1 = 0; - unsigned long mmcra = 0; + unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS; unsigned int pmc, unit, combine, l2sel, psel; unsigned int pmc_inuse = 0; int i; diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index 4795744..8eff48e 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c @@ -84,10 +84,6 @@ static short mmcr1_adder_bits[8] = { }; /* - * Bits in MMCRA - */ - -/* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 * 3210987654321098765432109876543210987654321098765432109876543210 -- cgit v0.10.2 From f7d7986060b2890fc26db6ab5203efbd33aa2497 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 18 Oct 2009 01:09:29 +0000 Subject: perf_event: Add alignment-faults and emulation-faults software events Add two more software events that are common to many cpus. Alignment faults: When a load or store is not aligned properly. Emulation faults: When an instruction is emulated in software. Both cause a very significant slowdown (100x or worse), so identifying and fixing them is very important. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 7b7fbf4..d6b95d1 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -106,6 +106,8 @@ enum perf_sw_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_MAX, /* non-ABI */ }; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2e6d95f..a33707a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -102,6 +102,8 @@ enum perf_sw_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_MAX, /* non-ABI */ }; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9d0b5c6..0683b33 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4186,6 +4186,8 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) case PERF_COUNT_SW_PAGE_FAULTS_MAJ: case PERF_COUNT_SW_CONTEXT_SWITCHES: case PERF_COUNT_SW_CPU_MIGRATIONS: + case PERF_COUNT_SW_ALIGNMENT_FAULTS: + case PERF_COUNT_SW_EMULATION_FAULTS: if (!event->parent) { atomic_inc(&perf_swevent_enabled[event_id]); event->destroy = sw_perf_event_destroy; diff --git a/tools/perf/design.txt b/tools/perf/design.txt index fdd42a8..f000c30 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -137,6 +137,8 @@ enum sw_event_ids { PERF_COUNT_SW_CPU_MIGRATIONS = 4, PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, }; Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 8cfb48c..34bd8442 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -46,6 +46,8 @@ static struct event_symbol event_symbols[] = { { CSW(PAGE_FAULTS_MAJ), "major-faults", "" }, { CSW(CONTEXT_SWITCHES), "context-switches", "cs" }, { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" }, + { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" }, + { CSW(EMULATION_FAULTS), "emulation-faults", "" }, }; #define __PERF_EVENT_FIELD(config, name) \ @@ -74,6 +76,8 @@ static const char *sw_event_names[] = { "CPU-migrations", "minor-faults", "major-faults", + "alignment-faults", + "emulation-faults", }; #define MAX_ALIASES 8 -- cgit v0.10.2 From eecff81d1fcda22cd0029d11fe2a71dceed11dad Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 27 Oct 2009 18:46:55 +0000 Subject: powerpc: Create PPC_WARN_ALIGNMENT to match PPC_WARN_EMULATED perf_event wants a separate event for alignment and emulation faults, so create another emulation event. This will make it easy to hook in perf_event at one spot. We pass in regs which will be required for these events. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h index 9154e85..640c4e4 100644 --- a/arch/powerpc/include/asm/emulated_ops.h +++ b/arch/powerpc/include/asm/emulated_ops.h @@ -57,7 +57,7 @@ extern u32 ppc_warn_emulated; extern void ppc_warn_emulated_print(const char *type); -#define PPC_WARN_EMULATED(type) \ +#define __PPC_WARN_EMULATED(type) \ do { \ atomic_inc(&ppc_emulated.type.val); \ if (ppc_warn_emulated) \ @@ -66,8 +66,11 @@ extern void ppc_warn_emulated_print(const char *type); #else /* !CONFIG_PPC_EMULATED_STATS */ -#define PPC_WARN_EMULATED(type) do { } while (0) +#define __PPC_WARN_EMULATED(type) do { } while (0) #endif /* !CONFIG_PPC_EMULATED_STATS */ +#define PPC_WARN_EMULATED(type, regs) __PPC_WARN_EMULATED(type) +#define PPC_WARN_ALIGNMENT(type, regs) __PPC_WARN_EMULATED(type) + #endif /* _ASM_POWERPC_EMULATED_OPS_H */ diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index a5b632e5..3839839 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -732,7 +732,7 @@ int fix_alignment(struct pt_regs *regs) #ifdef CONFIG_SPE if ((instr >> 26) == 0x4) { - PPC_WARN_EMULATED(spe); + PPC_WARN_ALIGNMENT(spe, regs); return emulate_spe(regs, reg, instr); } #endif @@ -786,7 +786,7 @@ int fix_alignment(struct pt_regs *regs) flags |= SPLT; nb = 8; } - PPC_WARN_EMULATED(vsx); + PPC_WARN_ALIGNMENT(vsx, regs); return emulate_vsx(addr, reg, areg, regs, flags, nb); } #endif @@ -794,7 +794,7 @@ int fix_alignment(struct pt_regs *regs) * the exception of DCBZ which is handled as a special case here */ if (instr == DCBZ) { - PPC_WARN_EMULATED(dcbz); + PPC_WARN_ALIGNMENT(dcbz, regs); return emulate_dcbz(regs, addr); } if (unlikely(nb == 0)) @@ -804,7 +804,7 @@ int fix_alignment(struct pt_regs *regs) * function */ if (flags & M) { - PPC_WARN_EMULATED(multiple); + PPC_WARN_ALIGNMENT(multiple, regs); return emulate_multiple(regs, addr, reg, nb, flags, instr, swiz); } @@ -825,11 +825,11 @@ int fix_alignment(struct pt_regs *regs) /* Special case for 16-byte FP loads and stores */ if (nb == 16) { - PPC_WARN_EMULATED(fp_pair); + PPC_WARN_ALIGNMENT(fp_pair, regs); return emulate_fp_pair(addr, reg, flags); } - PPC_WARN_EMULATED(unaligned); + PPC_WARN_ALIGNMENT(unaligned, regs); /* If we are loading, get the data from user space, else * get it from register values diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6f0ae1a..9d1f935 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -759,7 +759,7 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate the mfspr rD, PVR. */ if ((instword & PPC_INST_MFSPR_PVR_MASK) == PPC_INST_MFSPR_PVR) { - PPC_WARN_EMULATED(mfpvr); + PPC_WARN_EMULATED(mfpvr, regs); rd = (instword >> 21) & 0x1f; regs->gpr[rd] = mfspr(SPRN_PVR); return 0; @@ -767,7 +767,7 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulating the dcba insn is just a no-op. */ if ((instword & PPC_INST_DCBA_MASK) == PPC_INST_DCBA) { - PPC_WARN_EMULATED(dcba); + PPC_WARN_EMULATED(dcba, regs); return 0; } @@ -776,7 +776,7 @@ static int emulate_instruction(struct pt_regs *regs) int shift = (instword >> 21) & 0x1c; unsigned long msk = 0xf0000000UL >> shift; - PPC_WARN_EMULATED(mcrxr); + PPC_WARN_EMULATED(mcrxr, regs); regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); regs->xer &= ~0xf0000000UL; return 0; @@ -784,19 +784,19 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate load/store string insn. */ if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { - PPC_WARN_EMULATED(string); + PPC_WARN_EMULATED(string, regs); return emulate_string_inst(regs, instword); } /* Emulate the popcntb (Population Count Bytes) instruction. */ if ((instword & PPC_INST_POPCNTB_MASK) == PPC_INST_POPCNTB) { - PPC_WARN_EMULATED(popcntb); + PPC_WARN_EMULATED(popcntb, regs); return emulate_popcntb_inst(regs, instword); } /* Emulate isel (Integer Select) instruction */ if ((instword & PPC_INST_ISEL_MASK) == PPC_INST_ISEL) { - PPC_WARN_EMULATED(isel); + PPC_WARN_EMULATED(isel, regs); return emulate_isel(regs, instword); } @@ -995,7 +995,7 @@ void SoftwareEmulation(struct pt_regs *regs) #ifdef CONFIG_MATH_EMULATION errcode = do_mathemu(regs); if (errcode >= 0) - PPC_WARN_EMULATED(math); + PPC_WARN_EMULATED(math, regs); switch (errcode) { case 0: @@ -1018,7 +1018,7 @@ void SoftwareEmulation(struct pt_regs *regs) #elif defined(CONFIG_8XX_MINIMAL_FPEMU) errcode = Soft_emulate_8xx(regs); if (errcode >= 0) - PPC_WARN_EMULATED(8xx); + PPC_WARN_EMULATED(8xx, regs); switch (errcode) { case 0: @@ -1129,7 +1129,7 @@ void altivec_assist_exception(struct pt_regs *regs) flush_altivec_to_thread(current); - PPC_WARN_EMULATED(altivec); + PPC_WARN_EMULATED(altivec, regs); err = emulate_altivec(regs); if (err == 0) { regs->nip += 4; /* skip emulated instruction */ -- cgit v0.10.2 From 196f02bf900c5eb6f85d889c4f70e7cc11fda7e8 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 18 Oct 2009 01:13:00 +0000 Subject: powerpc: perf_event: Add alignment-faults and emulation-faults software events Hook up the alignment-faults and emulation-faults events for powerpc. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h index 640c4e4..f0fb4fc 100644 --- a/arch/powerpc/include/asm/emulated_ops.h +++ b/arch/powerpc/include/asm/emulated_ops.h @@ -19,6 +19,7 @@ #define _ASM_POWERPC_EMULATED_OPS_H #include +#include #ifdef CONFIG_PPC_EMULATED_STATS @@ -70,7 +71,18 @@ extern void ppc_warn_emulated_print(const char *type); #endif /* !CONFIG_PPC_EMULATED_STATS */ -#define PPC_WARN_EMULATED(type, regs) __PPC_WARN_EMULATED(type) -#define PPC_WARN_ALIGNMENT(type, regs) __PPC_WARN_EMULATED(type) +#define PPC_WARN_EMULATED(type, regs) \ + do { \ + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, \ + 1, 0, regs, 0); \ + __PPC_WARN_EMULATED(type); \ + } while (0) + +#define PPC_WARN_ALIGNMENT(type, regs) \ + do { \ + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, \ + 1, 0, regs, regs->dar); \ + __PPC_WARN_EMULATED(type); \ + } while (0) #endif /* _ASM_POWERPC_EMULATED_OPS_H */ -- cgit v0.10.2 From 1bf4af165050d90ea6659ffb2536ec8ca783aab5 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Oct 2009 18:47:42 +0000 Subject: powerpc: tracing: Add powerpc tracepoints for interrupt entry and exit This adds powerpc-specific tracepoints for interrupt entry and exit. While we already have generic irq_handler_entry and irq_handler_exit tracepoints there are cases on our virtualised powerpc machines where an interrupt is presented to the OS, but subsequently handled by the hypervisor. This means no OS interrupt handler is invoked. Here is an example on a POWER6 machine with the patch below applied: -0 [006] 3243.949840744: irq_entry: pt_regs=c0000000ce31fb10 -0 [006] 3243.949850520: irq_exit: pt_regs=c0000000ce31fb10 -0 [007] 3243.950218208: irq_entry: pt_regs=c0000000ce323b10 -0 [007] 3243.950224080: irq_exit: pt_regs=c0000000ce323b10 -0 [000] 3244.021879320: irq_entry: pt_regs=c000000000a63aa0 -0 [000] 3244.021883616: irq_handler_entry: irq=87 handler=eth0 -0 [000] 3244.021887328: irq_handler_exit: irq=87 return=handled -0 [000] 3244.021897408: irq_exit: pt_regs=c000000000a63aa0 Here we see two phantom interrupts (no handler was invoked), followed by a real interrupt for eth0. Without the tracepoints in this patch we would have missed the phantom interrupts. Signed-off-by: Anton Blanchard Acked-by: Steven Rostedt Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h new file mode 100644 index 0000000..187696d --- /dev/null +++ b/arch/powerpc/include/asm/trace.h @@ -0,0 +1,53 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM powerpc + +#if !defined(_TRACE_POWERPC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWERPC_H + +#include + +struct pt_regs; + +TRACE_EVENT(irq_entry, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +TRACE_EVENT(irq_exit, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +#endif /* _TRACE_POWERPC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH asm +#define TRACE_INCLUDE_FILE trace + +#include diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index e5d1211..02a3346 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -70,6 +70,8 @@ #include #include #endif +#define CREATE_TRACE_POINTS +#include int __irq_offset_value; static int ppc_spurious_interrupts; @@ -325,6 +327,8 @@ void do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); unsigned int irq; + trace_irq_entry(regs); + irq_enter(); check_stack_overflow(); @@ -348,6 +352,8 @@ void do_IRQ(struct pt_regs *regs) timer_interrupt(regs); } #endif + + trace_irq_exit(regs); } void __init init_IRQ(void) -- cgit v0.10.2 From 6795b85c6a4f690e61e7be31aa150d945c723fb5 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Oct 2009 18:49:14 +0000 Subject: powerpc: tracing: Add powerpc tracepoints for timer entry and exit We can monitor the effectiveness of our power management of both the kernel and hypervisor by probing the timer interrupt. For example, on this box we see 10.37s timer interrupts on an idle core: -0 [010] 3900.671297: timer_interrupt_entry: pt_regs=c0000000ce1e7b10 -0 [010] 3900.671302: timer_interrupt_exit: pt_regs=c0000000ce1e7b10 -0 [010] 3911.042963: timer_interrupt_entry: pt_regs=c0000000ce1e7b10 -0 [010] 3911.042968: timer_interrupt_exit: pt_regs=c0000000ce1e7b10 -0 [010] 3921.414630: timer_interrupt_entry: pt_regs=c0000000ce1e7b10 -0 [010] 3921.414635: timer_interrupt_exit: pt_regs=c0000000ce1e7b10 Since we have a 207MHz decrementer it will go negative and fire every 10.37s even if Linux is completely idle. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 187696d..b558c31 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -42,6 +42,40 @@ TRACE_EVENT(irq_exit, TP_printk("pt_regs=%p", __entry->regs) ); +TRACE_EVENT(timer_interrupt_entry, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + +TRACE_EVENT(timer_interrupt_exit, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs), + + TP_STRUCT__entry( + __field(struct pt_regs *, regs) + ), + + TP_fast_assign( + __entry->regs = regs; + ), + + TP_printk("pt_regs=%p", __entry->regs) +); + #endif /* _TRACE_POWERPC_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 92dc844..d6e88df 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -571,6 +572,8 @@ void timer_interrupt(struct pt_regs * regs) struct clock_event_device *evt = &decrementer->event; u64 now; + trace_timer_interrupt_entry(regs); + /* Ensure a positive value is written to the decrementer, or else * some CPUs will continuue to take decrementer exceptions */ set_dec(DECREMENTER_MAX); @@ -590,6 +593,7 @@ void timer_interrupt(struct pt_regs * regs) now = decrementer->next_tb - now; if (now <= DECREMENTER_MAX) set_dec((int)now); + trace_timer_interrupt_exit(regs); return; } old_regs = set_irq_regs(regs); @@ -620,6 +624,8 @@ void timer_interrupt(struct pt_regs * regs) irq_exit(); set_irq_regs(old_regs); + + trace_timer_interrupt_exit(regs); } void wakeup_decrementer(void) -- cgit v0.10.2 From c8cd093a6e9f96ea6b871576fd4e46d7c818bb89 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Oct 2009 18:50:29 +0000 Subject: powerpc: tracing: Add hypervisor call tracepoints Add hcall_entry and hcall_exit tracepoints. This replaces the inline assembly HCALL_STATS code and converts it to use the new tracepoints. To keep the disabled case as quick as possible, we embed a status word in the TOC so we can get at it with a single load. By doing so we keep the overhead at a minimum. Time taken for a null hcall: No tracepoint code: 135.79 cycles Disabled tracepoints: 137.95 cycles For reference, before this patch enabling HCALL_STATS resulted in a null hcall of 201.44 cycles! Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 3b10051..bf3382f 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -46,7 +46,7 @@ config DEBUG_STACK_USAGE config HCALL_STATS bool "Hypervisor call instrumentation" - depends on PPC_PSERIES && DEBUG_FS + depends on PPC_PSERIES && DEBUG_FS && TRACEPOINTS help Adds code to keep track of the number of hypervisor calls made and the amount of time spent in hypervisor calls. Wall time spent in diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 6251a4b..c27caac 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -274,6 +274,8 @@ struct hcall_stats { unsigned long num_calls; /* number of calls (on this CPU) */ unsigned long tb_total; /* total wall time (mftb) of calls. */ unsigned long purr_total; /* total cpu time (PURR) of calls. */ + unsigned long tb_start; + unsigned long purr_start; }; #define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1) diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index b558c31..9b01c0e 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -76,6 +76,51 @@ TRACE_EVENT(timer_interrupt_exit, TP_printk("pt_regs=%p", __entry->regs) ); +#ifdef CONFIG_PPC_PSERIES +extern void hcall_tracepoint_regfunc(void); +extern void hcall_tracepoint_unregfunc(void); + +TRACE_EVENT_FN(hcall_entry, + + TP_PROTO(unsigned long opcode), + + TP_ARGS(opcode), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + ), + + TP_fast_assign( + __entry->opcode = opcode; + ), + + TP_printk("opcode=%lu", __entry->opcode), + + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc +); + +TRACE_EVENT_FN(hcall_exit, + + TP_PROTO(unsigned long opcode, unsigned long retval), + + TP_ARGS(opcode, retval), + + TP_STRUCT__entry( + __field(unsigned long, opcode) + __field(unsigned long, retval) + ), + + TP_fast_assign( + __entry->opcode = opcode; + __entry->retval = retval; + ), + + TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval), + + hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc +); +#endif + #endif /* _TRACE_POWERPC_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c1427b3..01e95ab 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -14,20 +14,54 @@ #define STK_PARM(i) (48 + ((i)-3)*8) -#ifdef CONFIG_HCALL_STATS +#ifdef CONFIG_TRACEPOINTS + + .section ".toc","aw" + + .globl hcall_tracepoint_refcount +hcall_tracepoint_refcount: + .llong 0 + + .section ".text" + /* * precall must preserve all registers. use unused STK_PARM() - * areas to save snapshots and opcode. + * areas to save snapshots and opcode. We branch around this + * in early init (eg when populating the MMU hashtable) by using an + * unconditional cpu feature. */ #define HCALL_INST_PRECALL \ - std r3,STK_PARM(r3)(r1); /* save opcode */ \ - mftb r0; /* get timebase and */ \ - std r0,STK_PARM(r5)(r1); /* save for later */ \ BEGIN_FTR_SECTION; \ - mfspr r0,SPRN_PURR; /* get PURR and */ \ - std r0,STK_PARM(r6)(r1); /* save for later */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); - + b 1f; \ +END_FTR_SECTION(0, 1); \ + ld r12,hcall_tracepoint_refcount@toc(r2); \ + cmpdi r12,0; \ + beq+ 1f; \ + mflr r0; \ + std r3,STK_PARM(r3)(r1); \ + std r4,STK_PARM(r4)(r1); \ + std r5,STK_PARM(r5)(r1); \ + std r6,STK_PARM(r6)(r1); \ + std r7,STK_PARM(r7)(r1); \ + std r8,STK_PARM(r8)(r1); \ + std r9,STK_PARM(r9)(r1); \ + std r10,STK_PARM(r10)(r1); \ + std r0,16(r1); \ + stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + bl .__trace_hcall_entry; \ + addi r1,r1,STACK_FRAME_OVERHEAD; \ + ld r0,16(r1); \ + ld r3,STK_PARM(r3)(r1); \ + ld r4,STK_PARM(r4)(r1); \ + ld r5,STK_PARM(r5)(r1); \ + ld r6,STK_PARM(r6)(r1); \ + ld r7,STK_PARM(r7)(r1); \ + ld r8,STK_PARM(r8)(r1); \ + ld r9,STK_PARM(r9)(r1); \ + ld r10,STK_PARM(r10)(r1); \ + mtlr r0; \ +1: + /* * postcall is performed immediately before function return which * allows liberal use of volatile registers. We branch around this @@ -38,40 +72,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_PURR); BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ - cmpldi cr7,r4,MAX_HCALL_OPCODE; \ - bgt- cr7,1f; \ - \ - /* get time and PURR snapshots after hcall */ \ - mftb r7; /* timebase after */ \ -BEGIN_FTR_SECTION; \ - mfspr r8,SPRN_PURR; /* PURR after */ \ - ld r6,STK_PARM(r6)(r1); /* PURR before */ \ - subf r6,r6,r8; /* delta */ \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ - ld r5,STK_PARM(r5)(r1); /* timebase before */ \ - subf r5,r5,r7; /* time delta */ \ - \ - /* calculate address of stat structure r4 = opcode */ \ - srdi r4,r4,2; /* index into array */ \ - mulli r4,r4,HCALL_STAT_SIZE; \ - LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \ - add r4,r4,r7; \ - ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \ - add r4,r4,r7; \ - \ - /* update stats */ \ - ld r7,HCALL_STAT_CALLS(r4); /* count */ \ - addi r7,r7,1; \ - std r7,HCALL_STAT_CALLS(r4); \ - ld r7,HCALL_STAT_TB(r4); /* timebase */ \ - add r7,r7,r5; \ - std r7,HCALL_STAT_TB(r4); \ -BEGIN_FTR_SECTION; \ - ld r7,HCALL_STAT_PURR(r4); /* PURR */ \ - add r7,r7,r6; \ - std r7,HCALL_STAT_PURR(r4); \ -END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ + ld r12,hcall_tracepoint_refcount@toc(r2); \ + cmpdi r12,0; \ + beq+ 1f; \ + mflr r0; \ + ld r6,STK_PARM(r3)(r1); \ + std r3,STK_PARM(r3)(r1); \ + mr r4,r3; \ + mr r3,r6; \ + std r0,16(r1); \ + stdu r1,-STACK_FRAME_OVERHEAD(r1); \ + bl .__trace_hcall_exit; \ + addi r1,r1,STACK_FRAME_OVERHEAD; \ + ld r0,16(r1); \ + ld r3,STK_PARM(r3)(r1); \ + mtlr r0; \ 1: #else #define HCALL_INST_PRECALL diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 3631a4f..e44e103 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -26,6 +26,7 @@ #include #include #include +#include DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); @@ -100,6 +101,34 @@ static const struct file_operations hcall_inst_seq_fops = { #define HCALL_ROOT_DIR "hcall_inst" #define CPU_NAME_BUF_SIZE 32 + +static void probe_hcall_entry(unsigned long opcode) +{ + struct hcall_stats *h; + + if (opcode > MAX_HCALL_OPCODE) + return; + + h = &get_cpu_var(hcall_stats)[opcode / 4]; + h->tb_start = mftb(); + h->purr_start = mfspr(SPRN_PURR); +} + +static void probe_hcall_exit(unsigned long opcode, unsigned long retval) +{ + struct hcall_stats *h; + + if (opcode > MAX_HCALL_OPCODE) + return; + + h = &__get_cpu_var(hcall_stats)[opcode / 4]; + h->num_calls++; + h->tb_total = mftb() - h->tb_start; + h->purr_total = mfspr(SPRN_PURR) - h->purr_start; + + put_cpu_var(hcall_stats); +} + static int __init hcall_inst_init(void) { struct dentry *hcall_root; @@ -110,6 +139,14 @@ static int __init hcall_inst_init(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) return 0; + if (register_trace_hcall_entry(probe_hcall_entry)) + return -EINVAL; + + if (register_trace_hcall_exit(probe_hcall_exit)) { + unregister_trace_hcall_entry(probe_hcall_entry); + return -EINVAL; + } + hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); if (!hcall_root) return -ENOMEM; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 903eb9e..4b7b6e8 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "plpar_wrappers.h" #include "pseries.h" @@ -661,3 +662,34 @@ void arch_free_page(struct page *page, int order) EXPORT_SYMBOL(arch_free_page); #endif + +#ifdef CONFIG_TRACEPOINTS +/* + * We optimise our hcall path by placing hcall_tracepoint_refcount + * directly in the TOC so we can check if the hcall tracepoints are + * enabled via a single load. + */ + +/* NB: reg/unreg are called while guarded with the tracepoints_mutex */ +extern long hcall_tracepoint_refcount; + +void hcall_tracepoint_regfunc(void) +{ + hcall_tracepoint_refcount++; +} + +void hcall_tracepoint_unregfunc(void) +{ + hcall_tracepoint_refcount--; +} + +void __trace_hcall_entry(unsigned long opcode) +{ + trace_hcall_entry(opcode); +} + +void __trace_hcall_exit(long opcode, unsigned long retval) +{ + trace_hcall_exit(opcode, retval); +} +#endif -- cgit v0.10.2 From 6f26353ca29e96475208bce673efb6a2c58b73f2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Oct 2009 18:51:09 +0000 Subject: powerpc: tracing: Give hypervisor call tracepoints access to arguments While most users of the hcall tracepoints will only want the opcode and return code, some will want all the arguments. To avoid the complexity of using varargs we pass a pointer to the register save area, which contains all the arguments. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 9b01c0e..cbe2297 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -82,9 +82,9 @@ extern void hcall_tracepoint_unregfunc(void); TRACE_EVENT_FN(hcall_entry, - TP_PROTO(unsigned long opcode), + TP_PROTO(unsigned long opcode, unsigned long *args), - TP_ARGS(opcode), + TP_ARGS(opcode, args), TP_STRUCT__entry( __field(unsigned long, opcode) @@ -101,9 +101,10 @@ TRACE_EVENT_FN(hcall_entry, TRACE_EVENT_FN(hcall_exit, - TP_PROTO(unsigned long opcode, unsigned long retval), + TP_PROTO(unsigned long opcode, unsigned long retval, + unsigned long *retbuf), - TP_ARGS(opcode, retval), + TP_ARGS(opcode, retval, retbuf), TP_STRUCT__entry( __field(unsigned long, opcode) diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 01e95ab..383a5d0 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -30,7 +30,7 @@ hcall_tracepoint_refcount: * in early init (eg when populating the MMU hashtable) by using an * unconditional cpu feature. */ -#define HCALL_INST_PRECALL \ +#define HCALL_INST_PRECALL(FIRST_REG) \ BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ @@ -47,6 +47,7 @@ END_FTR_SECTION(0, 1); \ std r9,STK_PARM(r9)(r1); \ std r10,STK_PARM(r10)(r1); \ std r0,16(r1); \ + addi r4,r1,STK_PARM(FIRST_REG); \ stdu r1,-STACK_FRAME_OVERHEAD(r1); \ bl .__trace_hcall_entry; \ addi r1,r1,STACK_FRAME_OVERHEAD; \ @@ -68,7 +69,7 @@ END_FTR_SECTION(0, 1); \ * in early init (eg when populating the MMU hashtable) by using an * unconditional cpu feature. */ -#define HCALL_INST_POSTCALL \ +#define __HCALL_INST_POSTCALL \ BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ @@ -88,9 +89,19 @@ END_FTR_SECTION(0, 1); \ ld r3,STK_PARM(r3)(r1); \ mtlr r0; \ 1: + +#define HCALL_INST_POSTCALL_NORETS \ + li r5,0; \ + __HCALL_INST_POSTCALL + +#define HCALL_INST_POSTCALL(BUFREG) \ + mr r5,BUFREG; \ + __HCALL_INST_POSTCALL + #else -#define HCALL_INST_PRECALL -#define HCALL_INST_POSTCALL +#define HCALL_INST_PRECALL(FIRST_ARG) +#define HCALL_INST_POSTCALL_NORETS +#define HCALL_INST_POSTCALL(BUFREG) #endif .text @@ -101,11 +112,11 @@ _GLOBAL(plpar_hcall_norets) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r4) HVSC /* invoke the hypervisor */ - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL_NORETS lwz r0,8(r1) mtcrf 0xff,r0 @@ -117,7 +128,7 @@ _GLOBAL(plpar_hcall) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r5) std r4,STK_PARM(r4)(r1) /* Save ret buffer */ @@ -136,7 +147,7 @@ _GLOBAL(plpar_hcall) std r6, 16(r12) std r7, 24(r12) - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 @@ -183,7 +194,7 @@ _GLOBAL(plpar_hcall9) mfcr r0 stw r0,8(r1) - HCALL_INST_PRECALL + HCALL_INST_PRECALL(r5) std r4,STK_PARM(r4)(r1) /* Save ret buffer */ @@ -211,7 +222,7 @@ _GLOBAL(plpar_hcall9) std r11,56(r12) std r0, 64(r12) - HCALL_INST_POSTCALL + HCALL_INST_POSTCALL(r12) lwz r0,8(r1) mtcrf 0xff,r0 diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index e44e103..2f58c71 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c @@ -102,7 +102,7 @@ static const struct file_operations hcall_inst_seq_fops = { #define CPU_NAME_BUF_SIZE 32 -static void probe_hcall_entry(unsigned long opcode) +static void probe_hcall_entry(unsigned long opcode, unsigned long *args) { struct hcall_stats *h; @@ -114,7 +114,8 @@ static void probe_hcall_entry(unsigned long opcode) h->purr_start = mfspr(SPRN_PURR); } -static void probe_hcall_exit(unsigned long opcode, unsigned long retval) +static void probe_hcall_exit(unsigned long opcode, unsigned long retval, + unsigned long *retbuf) { struct hcall_stats *h; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 4b7b6e8..0707653 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -683,13 +683,14 @@ void hcall_tracepoint_unregfunc(void) hcall_tracepoint_refcount--; } -void __trace_hcall_entry(unsigned long opcode) +void __trace_hcall_entry(unsigned long opcode, unsigned long *args) { - trace_hcall_entry(opcode); + trace_hcall_entry(opcode, args); } -void __trace_hcall_exit(long opcode, unsigned long retval) +void __trace_hcall_exit(long opcode, unsigned long retval, + unsigned long *retbuf) { - trace_hcall_exit(opcode, retval); + trace_hcall_exit(opcode, retval, retbuf); } #endif -- cgit v0.10.2 From b3c86ee6d128dea7c671380090488887e73fa774 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Oct 2009 18:51:57 +0000 Subject: powerpc: Disable HCALL_STATS by default The overhead of HCALL_STATS is quite high and the functionality is very rarely used. Key statistics are also missing (eg min/max). With the new hcall tracepoints much more powerful tracing can be done in a kernel module. Lets disable this by default. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index f1889ab..c568329 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -1683,7 +1683,7 @@ CONFIG_HAVE_ARCH_KGDB=y CONFIG_DEBUG_STACKOVERFLOW=y # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_DEBUG_PAGEALLOC is not set -CONFIG_HCALL_STATS=y +# CONFIG_HCALL_STATS is not set # CONFIG_CODE_PATCHING_SELFTEST is not set # CONFIG_FTR_FIXUP_SELFTEST is not set # CONFIG_MSI_BITMAP_SELFTEST is not set -- cgit v0.10.2 From 907b1f45d901c956e4bcd3f27c4f1f25d6fb36b2 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Oct 2009 18:52:24 +0000 Subject: powerpc: Export powerpc_debugfs_root Kernel modules should be able to place their debug output inside our powerpc debugfs directory. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4271f7a..845c72a 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -660,6 +660,7 @@ late_initcall(check_cache_coherency); #ifdef CONFIG_DEBUG_FS struct dentry *powerpc_debugfs_root; +EXPORT_SYMBOL(powerpc_debugfs_root); static int powerpc_debugfs_init(void) { -- cgit v0.10.2 From 3cd980dbc1050889acca7306cbcedf79a4ba2f81 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 18 Oct 2009 01:23:28 +0000 Subject: powerpc: perf_event: Cleanup copy_page output by hiding setup symbol A lot of hits in "setup" doesn't make much sense, so hide this symbol and allow all the hits to end up in copy_4k_page. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 75f3267..e68beac 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -26,11 +26,11 @@ BEGIN_FTR_SECTION srd r8,r5,r11 mtctr r8 -setup: +.Lsetup: dcbt r9,r4 dcbz r9,r3 add r9,r9,r12 - bdnz setup + bdnz .Lsetup END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ) addi r3,r3,-8 srdi r8,r5,7 /* page is copied in 128 byte strides */ -- cgit v0.10.2 From 917e407c762ba6d91d1a4bc1c804d518585082a3 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 18 Oct 2009 01:24:29 +0000 Subject: powerpc: perf_event: Hide iseries_check_pending_irqs If CONFIG_PPC_ISERIES isn't defined we end up with iseries_check_pending_irqs and do_work at the same address. perf ends up picking iseries_check_pending_irqs which creates confusing backtraces. Hide it. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 900e0ee..e722226 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -551,7 +551,7 @@ restore: BEGIN_FW_FTR_SECTION ld r5,SOFTE(r1) FW_FTR_SECTION_ELSE - b iseries_check_pending_irqs + b .Liseries_check_pending_irqs ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); @@ -623,7 +623,7 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) #endif /* CONFIG_PPC_BOOK3E */ -iseries_check_pending_irqs: +.Liseries_check_pending_irqs: #ifdef CONFIG_PPC_ISERIES ld r5,SOFTE(r1) cmpdi 0,r5,0 -- cgit v0.10.2 From c86e2eaded39843e1bf4f07d1adfab4494f20894 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 18 Oct 2009 01:24:06 +0000 Subject: powerpc: perf_event: Cleanup output by adding symbols Add some dummy symbols for the branches at 0xf00, 0xf20 and 0xf40, otherwise hits end up in trap_0e which is confusing to the user. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 1808876..c7eb4e0 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -185,12 +185,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) * prolog code of the PerformanceMonitor one. A little * trickery is thus necessary */ +performance_monitor_pSeries_1: . = 0xf00 b performance_monitor_pSeries +altivec_unavailable_pSeries_1: . = 0xf20 b altivec_unavailable_pSeries +vsx_unavailable_pSeries_1: . = 0xf40 b vsx_unavailable_pSeries -- cgit v0.10.2 From 024e1a49411a1a7363e65db48edf1b09e9ee68ad Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 27 Oct 2009 19:24:46 -0700 Subject: tomoyo: improve hash bucket dispersion When examining the network device name hash, it was discovered that the low order bits of full_name_hash() are not very well dispersed across the possible values. When used by filesystem code, this is handled by folding with the function hash_long(). The only other non-filesystem usage of full_name_hash() at this time appears to be in TOMOYO. This patch should fix that. I do not use TOMOYO at this time, so this patch is build tested only. Signed-off-by: Stephen Hemminger Acked-by: Tetsuo Handa Signed-off-by: James Morris diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index 5f2e332..917f564 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -13,6 +13,8 @@ #include #include #include +#include + #include "common.h" #include "realpath.h" @@ -263,7 +265,8 @@ static unsigned int tomoyo_quota_for_savename; * table. Frequency of appending strings is very low. So we don't need * large (e.g. 64k) hash size. 256 will be sufficient. */ -#define TOMOYO_MAX_HASH 256 +#define TOMOYO_HASH_BITS 8 +#define TOMOYO_MAX_HASH (1u<entry.hash && !strcmp(name, ptr->entry.name)) goto out; } @@ -365,7 +370,7 @@ const struct tomoyo_path_info *tomoyo_save_name(const char *name) tomoyo_fill_path_info(&ptr->entry); fmb->ptr += len; fmb->len -= len; - list_add_tail(&ptr->list, &tomoyo_name_list[hash % TOMOYO_MAX_HASH]); + list_add_tail(&ptr->list, head); if (fmb->len == 0) { list_del(&fmb->list); kfree(fmb); -- cgit v0.10.2 From ff76ec18cabb12a6c8f3c65bd1d23f1a770fe908 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 28 Oct 2009 12:26:39 -0700 Subject: tpm: fix header for modular build Fix build for TCG_TPM=m. Header file doesn't handle this and incorrectly builds stubs. drivers/char/tpm/tpm.c:720: error: redefinition of 'tpm_pcr_read' include/linux/tpm.h:35: error:previous definition of 'tpm_pcr_read' was here drivers/char/tpm/tpm.c:752: error: redefinition of 'tpm_pcr_extend' include/linux/tpm.h:38: error:previous definition of 'tpm_pcr_extend' was here Repairs linux-next's commit d6ba452128178091dab7a04d54f7e66fdc32fb39 Author: Mimi Zohar Date: Mon Oct 26 09:26:18 2009 -0400 tpm add default function definitions Signed-off-by: Randy Dunlap Cc: Rajiv Andrade Cc: Mimi Zohar Cc: James Morris Cc: Eric Paris Signed-off-by: Andrew Morton Signed-off-by: James Morris diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 8eaa8f8..ac5d1c1 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -27,7 +27,7 @@ */ #define TPM_ANY_NUM 0xFFFF -#if defined(CONFIG_TCG_TPM) +#if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf); extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash); -- cgit v0.10.2 From 66bd8424cc05e800db384053bf7ab967e4658468 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 28 Oct 2009 21:51:21 -0200 Subject: perf tools: Delay loading symtabs till we hit a map with it So that we can have a quicker start on perf top and even speedups in the other tools, as we can have maps with no hits, so no need to load its symtabs. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256773881-4191-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6d63c2e..8688bfe 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -165,7 +165,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (map != NULL) { got_map: ip = map->map_ip(map, ip); - sym = map->dso->find_symbol(map->dso, ip); + sym = map__find_symbol(map, ip, symbol_filter); } else { /* * If this is outside of all known maps, @@ -203,7 +203,7 @@ static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { struct map *map = map__new(&event->mmap, NULL, 0, - sizeof(struct sym_priv), symbol_filter); + sizeof(struct sym_priv)); struct thread *thread = threads__findnew(event->mmap.pid); dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n", diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b3d814b..f1bcd35 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -455,7 +455,7 @@ got_map: dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip); *ipp = ip; - return map ? map->dso->find_symbol(map->dso, ip) : NULL; + return map ? map__find_symbol(map, ip, NULL) : NULL; } static int call__match(struct symbol *sym) @@ -751,7 +751,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { - struct map *map = map__new(&event->mmap, cwd, cwdlen, 0, NULL); + struct map *map = map__new(&event->mmap, cwd, cwdlen, 0); struct thread *thread = threads__findnew(event->mmap.pid); dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a02fc41..ee87640 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -834,7 +834,7 @@ static void event__process_sample(const event_t *self, int counter) map = thread__find_map(thread, ip); if (map != NULL) { ip = map->map_ip(map, ip); - sym = map->dso->find_symbol(map->dso, ip); + sym = map__find_symbol(map, ip, symbol_filter); if (sym == NULL) return; userspace_samples++; @@ -879,8 +879,7 @@ static void event__process_mmap(event_t *self) if (thread != NULL) { struct map *map = map__new(&self->mmap, NULL, 0, - sizeof(struct sym_entry), - symbol_filter); + sizeof(struct sym_entry)); if (map != NULL) thread__insert_map(thread, map); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 2ae1177..3064a05 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -106,10 +106,11 @@ struct symbol; typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, - unsigned int sym_priv_size, symbol_filter_t filter); + unsigned int sym_priv_size); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); +struct symbol *map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter); int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)); void event__synthesize_threads(int (*process)(event_t *event)); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index c1c5568..d302e51 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -21,7 +21,7 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen) } struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, - unsigned int sym_priv_size, symbol_filter_t filter) + unsigned int sym_priv_size) { struct map *self = malloc(sizeof(*self)); @@ -29,7 +29,6 @@ struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, const char *filename = event->filename; char newfilename[PATH_MAX]; int anon; - bool new_dso; if (cwd) { int n = strcommon(filename, cwd, cwdlen); @@ -52,23 +51,10 @@ struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, self->end = event->start + event->len; self->pgoff = event->pgoff; - self->dso = dsos__findnew(filename, sym_priv_size, &new_dso); + self->dso = dsos__findnew(filename, sym_priv_size); if (self->dso == NULL) goto out_delete; - if (new_dso) { - int nr = dso__load(self->dso, self, filter); - - if (nr < 0) - pr_warning("Failed to open %s, continuing " - "without symbols\n", - self->dso->long_name); - else if (nr == 0) - pr_warning("No symbols found in %s, maybe " - "install a debug package?\n", - self->dso->long_name); - } - if (self->dso == vdso || anon) self->map_ip = self->unmap_ip = identity__map_ip; else { @@ -82,6 +68,26 @@ out_delete: return NULL; } +struct symbol * +map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter) +{ + if (!self->dso->loaded) { + int nr = dso__load(self->dso, self, filter); + + if (nr < 0) { + pr_warning("Failed to open %s, continuing without symbols\n", + self->dso->long_name); + return NULL; + } else if (nr == 0) { + pr_warning("No symbols found in %s, maybe install a debug package?\n", + self->dso->long_name); + return NULL; + } + } + + return self->dso->find_symbol(self->dso, ip); +} + struct map *map__clone(struct map *self) { struct map *map = malloc(sizeof(*self)); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8f0208c..0273d83 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -909,6 +909,8 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) int ret = -1; int fd; + self->loaded = true; + if (!name) return -1; @@ -1019,6 +1021,8 @@ static int dso__load_module_sym(struct dso *self, struct map *map, { int err = 0, fd = open(self->long_name, O_RDONLY); + self->loaded = true; + if (fd < 0) { pr_err("%s: cannot open %s\n", __func__, self->long_name); return err; @@ -1214,6 +1218,8 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, { int err, fd = open(vmlinux, O_RDONLY); + self->loaded = true; + if (fd < 0) return -1; @@ -1312,19 +1318,15 @@ static struct dso *dsos__find(const char *name) return NULL; } -struct dso *dsos__findnew(const char *name, unsigned int sym_priv_size, - bool *is_new) +struct dso *dsos__findnew(const char *name, unsigned int sym_priv_size) { struct dso *dso = dsos__find(name); if (!dso) { dso = dso__new(name, sym_priv_size); - if (dso) { + if (dso != NULL) dsos__add(dso); - *is_new = true; - } - } else - *is_new = false; + } return dso; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 77b7b3e..432edbc 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -46,6 +46,7 @@ struct dso { unsigned int sym_priv_size; unsigned char adjust_symbols; unsigned char slen_calculated; + bool loaded; unsigned char origin; const char *short_name; char *long_name; @@ -64,8 +65,7 @@ struct symbol *dso__find_symbol(struct dso *self, u64 ip); int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, symbol_filter_t filter, int modules); -struct dso *dsos__findnew(const char *name, unsigned int sym_priv_size, - bool *is_new); +struct dso *dsos__findnew(const char *name, unsigned int sym_priv_size); int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); void dsos__fprintf(FILE *fp); -- cgit v0.10.2 From 7f387d3f2421781610588faa2f49ae5f1737b137 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:04 -0400 Subject: x86: Fix SSE opcode map bug Fix superscripts position because some superscripts of SSE opcode are not put in correct position. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204204.30545.97296.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 701c467..efef3ca 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -401,9 +401,9 @@ Referrer: 2-byte escape 62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66) 63: packsswb Pq,Qq | packsswb Vdq,Wdq (66) 64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66) -65: pcmpgtw Pq,Qq | pcmpgtw(66) Vdq,Wdq +65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66) 66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66) -67: packuswb Pq,Qq | packuswb(66) Vdq,Wdq +67: packuswb Pq,Qq | packuswb Vdq,Wdq (66) 68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66) 69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66) 6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66) @@ -425,8 +425,8 @@ Referrer: 2-byte escape 79: VMWRITE Gd/q,Ed/q 7a: 7b: -7c: haddps(F2) Vps,Wps | haddpd(66) Vpd,Wpd -7d: hsubps(F2) Vps,Wps | hsubpd(66) Vpd,Wpd +7c: haddps Vps,Wps (F2) | haddpd Vpd,Wpd (66) +7d: hsubps Vps,Wps (F2) | hsubpd Vpd,Wpd (66) 7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66) | movq Vq,Wq (F3) 7f: movq Qq,Pq | movdqa Wdq,Vdq (66) | movdqu Wdq,Vdq (F3) # 0x0f 0x80-0x8f @@ -574,7 +574,7 @@ Referrer: 3-byte escape 1 01: phaddw Pq,Qq | phaddw Vdq,Wdq (66) 02: phaddd Pq,Qq | phaddd Vdq,Wdq (66) 03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66) -04: pmaddubsw Pq,Qq | pmaddubsw (66)Vdq,Wdq +04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66) 05: phsubw Pq,Qq | phsubw Vdq,Wdq (66) 06: phsubd Pq,Qq | phsubd Vdq,Wdq (66) 07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66) -- cgit v0.10.2 From 04d46c1b13b02e1e5c24eb270a01cf3f94ee4d04 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:11 -0400 Subject: x86: Merge INAT_REXPFX into INAT_PFX_* Merge INAT_REXPFX into INAT_PFX_* macro and rename it to INAT_PFX_REX. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204211.30545.58090.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index 2866fdd..c2487d2 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h @@ -30,10 +30,11 @@ #define INAT_OPCODE_TABLE_SIZE 256 #define INAT_GROUP_TABLE_SIZE 8 -/* Legacy instruction prefixes */ +/* Legacy last prefixes */ #define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ #define INAT_PFX_REPNE 2 /* 0xF2 */ /* LPFX2 */ #define INAT_PFX_REPE 3 /* 0xF3 */ /* LPFX3 */ +/* Other Legacy prefixes */ #define INAT_PFX_LOCK 4 /* 0xF0 */ #define INAT_PFX_CS 5 /* 0x2E */ #define INAT_PFX_DS 6 /* 0x3E */ @@ -42,8 +43,11 @@ #define INAT_PFX_GS 9 /* 0x65 */ #define INAT_PFX_SS 10 /* 0x36 */ #define INAT_PFX_ADDRSZ 11 /* 0x67 */ +/* x86-64 REX prefix */ +#define INAT_PFX_REX 12 /* 0x4X */ -#define INAT_LPREFIX_MAX 3 +#define INAT_LSTPFX_MAX 3 +#define INAT_LGCPFX_MAX 11 /* Immediate size */ #define INAT_IMM_BYTE 1 @@ -75,12 +79,11 @@ #define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) /* Flags */ #define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) -#define INAT_REXPFX (1 << INAT_FLAG_OFFS) -#define INAT_MODRM (1 << (INAT_FLAG_OFFS + 1)) -#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 2)) -#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 3)) -#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 4)) -#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_MODRM (1 << (INAT_FLAG_OFFS)) +#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) +#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) +#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) +#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) @@ -97,9 +100,10 @@ extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_attr_t esc_attr); /* Attribute checking functions */ -static inline int inat_is_prefix(insn_attr_t attr) +static inline int inat_is_legacy_prefix(insn_attr_t attr) { - return attr & INAT_PFX_MASK; + attr &= INAT_PFX_MASK; + return attr && attr <= INAT_LGCPFX_MAX; } static inline int inat_is_address_size_prefix(insn_attr_t attr) @@ -112,9 +116,14 @@ static inline int inat_is_operand_size_prefix(insn_attr_t attr) return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; } +static inline int inat_is_rex_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_REX; +} + static inline int inat_last_prefix_id(insn_attr_t attr) { - if ((attr & INAT_PFX_MASK) > INAT_LPREFIX_MAX) + if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) return 0; else return attr & INAT_PFX_MASK; @@ -155,11 +164,6 @@ static inline int inat_immediate_size(insn_attr_t attr) return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; } -static inline int inat_is_rex_prefix(insn_attr_t attr) -{ - return attr & INAT_REXPFX; -} - static inline int inat_has_modrm(insn_attr_t attr) { return attr & INAT_MODRM; diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index dfd56a3..9f48317 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -69,7 +69,7 @@ void insn_get_prefixes(struct insn *insn) lb = 0; b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); - while (inat_is_prefix(attr)) { + while (inat_is_legacy_prefix(attr)) { /* Skip if same prefix */ for (i = 0; i < nb; i++) if (prefixes->bytes[i] == b) diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 19ba096..7d54929 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -278,7 +278,7 @@ function convert_operands(opnd, i,imm,mod) # check REX prefix if (match(opcode, rex_expr)) - flags = add_flags(flags, "INAT_REXPFX") + flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") # check coprocessor escape : TODO if (match(opcode, fpu_expr)) @@ -316,7 +316,7 @@ END { # print escape opcode map's array print "/* Escape opcode map array */" print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ - "[INAT_LPREFIX_MAX + 1] = {" + "[INAT_LSTPFX_MAX + 1] = {" for (i = 0; i < geid; i++) for (j = 0; j < max_lprefix; j++) if (etable[i,j]) @@ -325,7 +325,7 @@ END { # print group opcode map's array print "/* Group opcode map array */" print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ - "[INAT_LPREFIX_MAX + 1] = {" + "[INAT_LSTPFX_MAX + 1] = {" for (i = 0; i < ggid; i++) for (j = 0; j < max_lprefix; j++) if (gtable[i,j]) -- cgit v0.10.2 From 82cb57028c864822c5a260f806d051e2ce28c86a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:19 -0400 Subject: x86: Add pclmulq to x86 opcode map Add pclmulq opcode to x86 opcode map. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204219.30545.82039.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index efef3ca..1f41246 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -672,6 +672,7 @@ Referrer: 3-byte escape 2 40: dpps Vdq,Wdq,Ib (66) 41: dppd Vdq,Wdq,Ib (66) 42: mpsadbw Vdq,Wdq,Ib (66) +44: pclmulq Vdq,Wdq,Ib (66) 60: pcmpestrm Vdq,Wdq,Ib (66) 61: pcmpestri Vdq,Wdq,Ib (66) 62: pcmpistrm Vdq,Wdq,Ib (66) -- cgit v0.10.2 From e0e492e99b372c6990a5daca9e4683c341f1330e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:27 -0400 Subject: x86: AVX instruction set decoder support Add Intel AVX(Advanced Vector Extensions) instruction set support to x86 instruction decoder. This adds insn.vex_prefix field for storing VEX prefixes, and introduces some original tags for expressing opcodes attributes. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204226.30545.23451.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h index c2487d2..205b063 100644 --- a/arch/x86/include/asm/inat.h +++ b/arch/x86/include/asm/inat.h @@ -32,8 +32,8 @@ /* Legacy last prefixes */ #define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ -#define INAT_PFX_REPNE 2 /* 0xF2 */ /* LPFX2 */ -#define INAT_PFX_REPE 3 /* 0xF3 */ /* LPFX3 */ +#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ +#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ /* Other Legacy prefixes */ #define INAT_PFX_LOCK 4 /* 0xF0 */ #define INAT_PFX_CS 5 /* 0x2E */ @@ -45,6 +45,9 @@ #define INAT_PFX_ADDRSZ 11 /* 0x67 */ /* x86-64 REX prefix */ #define INAT_PFX_REX 12 /* 0x4X */ +/* AVX VEX prefixes */ +#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ +#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ #define INAT_LSTPFX_MAX 3 #define INAT_LGCPFX_MAX 11 @@ -84,6 +87,8 @@ #define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) #define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) +#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) +#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) @@ -98,6 +103,9 @@ extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, insn_attr_t esc_attr); +extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, + insn_byte_t vex_m, + insn_byte_t vex_pp); /* Attribute checking functions */ static inline int inat_is_legacy_prefix(insn_attr_t attr) @@ -129,6 +137,17 @@ static inline int inat_last_prefix_id(insn_attr_t attr) return attr & INAT_PFX_MASK; } +static inline int inat_is_vex_prefix(insn_attr_t attr) +{ + attr &= INAT_PFX_MASK; + return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; +} + +static inline int inat_is_vex3_prefix(insn_attr_t attr) +{ + return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; +} + static inline int inat_is_escape(insn_attr_t attr) { return attr & INAT_ESC_MASK; @@ -189,4 +208,13 @@ static inline int inat_has_variant(insn_attr_t attr) return attr & INAT_VARIANT; } +static inline int inat_accept_vex(insn_attr_t attr) +{ + return attr & INAT_VEXOK; +} + +static inline int inat_must_vex(insn_attr_t attr) +{ + return attr & INAT_VEXONLY; +} #endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 12b4e37..96c2e0a 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -39,6 +39,7 @@ struct insn { * prefixes.bytes[3]: last prefix */ struct insn_field rex_prefix; /* REX prefix */ + struct insn_field vex_prefix; /* VEX prefix */ struct insn_field opcode; /* * opcode.bytes[0]: opcode1 * opcode.bytes[1]: opcode2 @@ -80,6 +81,19 @@ struct insn { #define X86_REX_X(rex) ((rex) & 2) #define X86_REX_B(rex) ((rex) & 1) +/* VEX bit flags */ +#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ +#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ +#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ +#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ +#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ +/* VEX bit fields */ +#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ +#define X86_VEX2_M 1 /* VEX2.M always 1 */ +#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ +#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ + /* The last prefix is needed for two-byte and three-byte opcodes */ static inline insn_byte_t insn_last_prefix(struct insn *insn) { @@ -114,15 +128,42 @@ static inline void kernel_insn_init(struct insn *insn, const void *kaddr) #endif } +static inline int insn_is_avx(struct insn *insn) +{ + if (!insn->prefixes.got) + insn_get_prefixes(insn); + return (insn->vex_prefix.value != 0); +} + +static inline insn_byte_t insn_vex_m_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX2_M; + else + return X86_VEX3_M(insn->vex_prefix.bytes[1]); +} + +static inline insn_byte_t insn_vex_p_bits(struct insn *insn) +{ + if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ + return X86_VEX_P(insn->vex_prefix.bytes[1]); + else + return X86_VEX_P(insn->vex_prefix.bytes[2]); +} + /* Offset of each field from kaddr */ static inline int insn_offset_rex_prefix(struct insn *insn) { return insn->prefixes.nbytes; } -static inline int insn_offset_opcode(struct insn *insn) +static inline int insn_offset_vex_prefix(struct insn *insn) { return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; } +static inline int insn_offset_opcode(struct insn *insn) +{ + return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; +} static inline int insn_offset_modrm(struct insn *insn) { return insn_offset_opcode(insn) + insn->opcode.nbytes; diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c index 3fb5998..46fc4ee 100644 --- a/arch/x86/lib/inat.c +++ b/arch/x86/lib/inat.c @@ -76,3 +76,15 @@ insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx, inat_group_common_attribute(grp_attr); } +insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, + insn_byte_t vex_p) +{ + const insn_attr_t *table; + if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) + return 0; + table = inat_avx_tables[vex_m][vex_p]; + if (!table) + return 0; + return table[opcode]; +} + diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 9f48317..9f33b98 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -28,6 +28,9 @@ #define peek_next(t, insn) \ ({t r; r = *(t*)insn->next_byte; r; }) +#define peek_nbyte_next(t, insn, n) \ + ({t r; r = *(t*)((insn)->next_byte + n); r; }) + /** * insn_init() - initialize struct insn * @insn: &struct insn to be initialized @@ -107,6 +110,7 @@ found: insn->prefixes.bytes[3] = lb; } + /* Decode REX prefix */ if (insn->x86_64) { b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); @@ -120,6 +124,39 @@ found: } } insn->rex_prefix.got = 1; + + /* Decode VEX prefix */ + b = peek_next(insn_byte_t, insn); + attr = inat_get_opcode_attribute(b); + if (inat_is_vex_prefix(attr)) { + insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); + if (!insn->x86_64) { + /* + * In 32-bits mode, if the [7:6] bits (mod bits of + * ModRM) on the second byte are not 11b, it is + * LDS or LES. + */ + if (X86_MODRM_MOD(b2) != 3) + goto vex_end; + } + insn->vex_prefix.bytes[0] = b; + insn->vex_prefix.bytes[1] = b2; + if (inat_is_vex3_prefix(attr)) { + b2 = peek_nbyte_next(insn_byte_t, insn, 2); + insn->vex_prefix.bytes[2] = b2; + insn->vex_prefix.nbytes = 3; + insn->next_byte += 3; + if (insn->x86_64 && X86_VEX_W(b2)) + /* VEX.W overrides opnd_size */ + insn->opnd_bytes = 8; + } else { + insn->vex_prefix.nbytes = 2; + insn->next_byte += 2; + } + } +vex_end: + insn->vex_prefix.got = 1; + prefixes->got = 1; return; } @@ -147,6 +184,18 @@ void insn_get_opcode(struct insn *insn) op = get_next(insn_byte_t, insn); opcode->bytes[0] = op; opcode->nbytes = 1; + + /* Check if there is VEX prefix or not */ + if (insn_is_avx(insn)) { + insn_byte_t m, p; + m = insn_vex_m_bits(insn); + p = insn_vex_p_bits(insn); + insn->attr = inat_get_avx_attribute(op, m, p); + if (!inat_accept_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ + goto end; /* VEX has only 1 byte for opcode */ + } + insn->attr = inat_get_opcode_attribute(op); while (inat_is_escape(insn->attr)) { /* Get escaped opcode */ @@ -155,6 +204,9 @@ void insn_get_opcode(struct insn *insn) pfx = insn_last_prefix(insn); insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); } + if (inat_must_vex(insn->attr)) + insn->attr = 0; /* This instruction is bad */ +end: opcode->got = 1; } diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 1f41246..9887bfe 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -3,6 +3,7 @@ # # Table: table-name # Referrer: escaped-name +# AVXcode: avx-code # opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] # (or) # opcode: escape # escaped-name @@ -13,9 +14,16 @@ # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] # EndTable # +# AVX Superscripts +# (VEX): this opcode can accept VEX prefix. +# (oVEX): this opcode requires VEX prefix. +# (o128): this opcode only supports 128bit VEX. +# (o256): this opcode only supports 256bit VEX. +# Table: one byte opcode Referrer: +AVXcode: # 0x00 - 0x0f 00: ADD Eb,Gb 01: ADD Ev,Gv @@ -225,8 +233,8 @@ c0: Grp2 Eb,Ib (1A) c1: Grp2 Ev,Ib (1A) c2: RETN Iw (f64) c3: RETN -c4: LES Gz,Mp (i64) -c5: LDS Gz,Mp (i64) +c4: LES Gz,Mp (i64) | 3bytes-VEX (Prefix) +c5: LDS Gz,Mp (i64) | 2bytes-VEX (Prefix) c6: Grp11 Eb,Ib (1A) c7: Grp11 Ev,Iz (1A) c8: ENTER Iw,Ib @@ -290,8 +298,9 @@ fe: Grp4 (1A) ff: Grp5 (1A) EndTable -Table: 2-byte opcode # First Byte is 0x0f +Table: 2-byte opcode (0x0f) Referrer: 2-byte escape +AVXcode: 1 # 0x0f 0x00-0x0f 00: Grp6 (1A) 01: Grp7 (1A) @@ -311,14 +320,14 @@ Referrer: 2-byte escape # 3DNow! uses the last imm byte as opcode extension. 0f: 3DNow! Pq,Qq,Ib # 0x0f 0x10-0x1f -10: movups Vps,Wps | movss Vss,Wss (F3) | movupd Vpd,Wpd (66) | movsd Vsd,Wsd (F2) -11: movups Wps,Vps | movss Wss,Vss (F3) | movupd Wpd,Vpd (66) | movsd Wsd,Vsd (F2) -12: movlps Vq,Mq | movlpd Vq,Mq (66) | movhlps Vq,Uq | movddup Vq,Wq (F2) | movsldup Vq,Wq (F3) -13: mpvlps Mq,Vq | movlpd Mq,Vq (66) -14: unpcklps Vps,Wq | unpcklpd Vpd,Wq (66) -15: unpckhps Vps,Wq | unpckhpd Vpd,Wq (66) -16: movhps Vq,Mq | movhpd Vq,Mq (66) | movlsps Vq,Uq | movshdup Vq,Wq (F3) -17: movhps Mq,Vq | movhpd Mq,Vq (66) +10: movups Vps,Wps (VEX) | movss Vss,Wss (F3),(VEX),(o128) | movupd Vpd,Wpd (66),(VEX) | movsd Vsd,Wsd (F2),(VEX),(o128) +11: movups Wps,Vps (VEX) | movss Wss,Vss (F3),(VEX),(o128) | movupd Wpd,Vpd (66),(VEX) | movsd Wsd,Vsd (F2),(VEX),(o128) +12: movlps Vq,Mq (VEX),(o128) | movlpd Vq,Mq (66),(VEX),(o128) | movhlps Vq,Uq (VEX),(o128) | movddup Vq,Wq (F2),(VEX) | movsldup Vq,Wq (F3),(VEX) +13: mpvlps Mq,Vq (VEX),(o128) | movlpd Mq,Vq (66),(VEX),(o128) +14: unpcklps Vps,Wq (VEX) | unpcklpd Vpd,Wq (66),(VEX) +15: unpckhps Vps,Wq (VEX) | unpckhpd Vpd,Wq (66),(VEX) +16: movhps Vq,Mq (VEX),(o128) | movhpd Vq,Mq (66),(VEX),(o128) | movlsps Vq,Uq (VEX),(o128) | movshdup Vq,Wq (F3),(VEX) +17: movhps Mq,Vq (VEX),(o128) | movhpd Mq,Vq (66),(VEX),(o128) 18: Grp16 (1A) 19: 1a: @@ -336,14 +345,14 @@ Referrer: 2-byte escape 25: 26: 27: -28: movaps Vps,Wps | movapd Vpd,Wpd (66) -29: movaps Wps,Vps | movapd Wpd,Vpd (66) -2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2) -2b: movntps Mps,Vps | movntpd Mpd,Vpd (66) -2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2) -2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2) -2e: ucomiss Vss,Wss | ucomisd Vsd,Wsd (66) -2f: comiss Vss,Wss | comisd Vsd,Wsd (66) +28: movaps Vps,Wps (VEX) | movapd Vpd,Wpd (66),(VEX) +29: movaps Wps,Vps (VEX) | movapd Wpd,Vpd (66),(VEX) +2a: cvtpi2ps Vps,Qpi | cvtsi2ss Vss,Ed/q (F3),(VEX),(o128) | cvtpi2pd Vpd,Qpi (66) | cvtsi2sd Vsd,Ed/q (F2),(VEX),(o128) +2b: movntps Mps,Vps (VEX) | movntpd Mpd,Vpd (66),(VEX) +2c: cvttps2pi Ppi,Wps | cvttss2si Gd/q,Wss (F3),(VEX),(o128) | cvttpd2pi Ppi,Wpd (66) | cvttsd2si Gd/q,Wsd (F2),(VEX),(o128) +2d: cvtps2pi Ppi,Wps | cvtss2si Gd/q,Wss (F3),(VEX),(o128) | cvtpd2pi Qpi,Wpd (66) | cvtsd2si Gd/q,Wsd (F2),(VEX),(o128) +2e: ucomiss Vss,Wss (VEX),(o128) | ucomisd Vsd,Wsd (66),(VEX),(o128) +2f: comiss Vss,Wss (VEX),(o128) | comisd Vsd,Wsd (66),(VEX),(o128) # 0x0f 0x30-0x3f 30: WRMSR 31: RDTSC @@ -379,56 +388,56 @@ Referrer: 2-byte escape 4e: CMOVLE/NG Gv,Ev 4f: CMOVNLE/G Gv,Ev # 0x0f 0x50-0x5f -50: movmskps Gd/q,Ups | movmskpd Gd/q,Upd (66) -51: sqrtps Vps,Wps | sqrtss Vss,Wss (F3) | sqrtpd Vpd,Wpd (66) | sqrtsd Vsd,Wsd (F2) -52: rsqrtps Vps,Wps | rsqrtss Vss,Wss (F3) -53: rcpps Vps,Wps | rcpss Vss,Wss (F3) -54: andps Vps,Wps | andpd Vpd,Wpd (66) -55: andnps Vps,Wps | andnpd Vpd,Wpd (66) -56: orps Vps,Wps | orpd Vpd,Wpd (66) -57: xorps Vps,Wps | xorpd Vpd,Wpd (66) -58: addps Vps,Wps | addss Vss,Wss (F3) | addpd Vpd,Wpd (66) | addsd Vsd,Wsd (F2) -59: mulps Vps,Wps | mulss Vss,Wss (F3) | mulpd Vpd,Wpd (66) | mulsd Vsd,Wsd (F2) -5a: cvtps2pd Vpd,Wps | cvtss2sd Vsd,Wss (F3) | cvtpd2ps Vps,Wpd (66) | cvtsd2ss Vsd,Wsd (F2) -5b: cvtdq2ps Vps,Wdq | cvtps2dq Vdq,Wps (66) | cvttps2dq Vdq,Wps (F3) -5c: subps Vps,Wps | subss Vss,Wss (F3) | subpd Vpd,Wpd (66) | subsd Vsd,Wsd (F2) -5d: minps Vps,Wps | minss Vss,Wss (F3) | minpd Vpd,Wpd (66) | minsd Vsd,Wsd (F2) -5e: divps Vps,Wps | divss Vss,Wss (F3) | divpd Vpd,Wpd (66) | divsd Vsd,Wsd (F2) -5f: maxps Vps,Wps | maxss Vss,Wss (F3) | maxpd Vpd,Wpd (66) | maxsd Vsd,Wsd (F2) +50: movmskps Gd/q,Ups (VEX) | movmskpd Gd/q,Upd (66),(VEX) +51: sqrtps Vps,Wps (VEX) | sqrtss Vss,Wss (F3),(VEX),(o128) | sqrtpd Vpd,Wpd (66),(VEX) | sqrtsd Vsd,Wsd (F2),(VEX),(o128) +52: rsqrtps Vps,Wps (VEX) | rsqrtss Vss,Wss (F3),(VEX),(o128) +53: rcpps Vps,Wps (VEX) | rcpss Vss,Wss (F3),(VEX),(o128) +54: andps Vps,Wps (VEX) | andpd Vpd,Wpd (66),(VEX) +55: andnps Vps,Wps (VEX) | andnpd Vpd,Wpd (66),(VEX) +56: orps Vps,Wps (VEX) | orpd Vpd,Wpd (66),(VEX) +57: xorps Vps,Wps (VEX) | xorpd Vpd,Wpd (66),(VEX) +58: addps Vps,Wps (VEX) | addss Vss,Wss (F3),(VEX),(o128) | addpd Vpd,Wpd (66),(VEX) | addsd Vsd,Wsd (F2),(VEX),(o128) +59: mulps Vps,Wps (VEX) | mulss Vss,Wss (F3),(VEX),(o128) | mulpd Vpd,Wpd (66),(VEX) | mulsd Vsd,Wsd (F2),(VEX),(o128) +5a: cvtps2pd Vpd,Wps (VEX) | cvtss2sd Vsd,Wss (F3),(VEX),(o128) | cvtpd2ps Vps,Wpd (66),(VEX) | cvtsd2ss Vsd,Wsd (F2),(VEX),(o128) +5b: cvtdq2ps Vps,Wdq (VEX) | cvtps2dq Vdq,Wps (66),(VEX) | cvttps2dq Vdq,Wps (F3),(VEX) +5c: subps Vps,Wps (VEX) | subss Vss,Wss (F3),(VEX),(o128) | subpd Vpd,Wpd (66),(VEX) | subsd Vsd,Wsd (F2),(VEX),(o128) +5d: minps Vps,Wps (VEX) | minss Vss,Wss (F3),(VEX),(o128) | minpd Vpd,Wpd (66),(VEX) | minsd Vsd,Wsd (F2),(VEX),(o128) +5e: divps Vps,Wps (VEX) | divss Vss,Wss (F3),(VEX),(o128) | divpd Vpd,Wpd (66),(VEX) | divsd Vsd,Wsd (F2),(VEX),(o128) +5f: maxps Vps,Wps (VEX) | maxss Vss,Wss (F3),(VEX),(o128) | maxpd Vpd,Wpd (66),(VEX) | maxsd Vsd,Wsd (F2),(VEX),(o128) # 0x0f 0x60-0x6f -60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66) -61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66) -62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66) -63: packsswb Pq,Qq | packsswb Vdq,Wdq (66) -64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66) -65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66) -66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66) -67: packuswb Pq,Qq | packuswb Vdq,Wdq (66) -68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66) -69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66) -6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66) -6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66) -6c: punpcklqdq Vdq,Wdq (66) -6d: punpckhqdq Vdq,Wdq (66) -6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66) -6f: movq Pq,Qq | movdqa Vdq,Wdq (66) | movdqu Vdq,Wdq (F3) +60: punpcklbw Pq,Qd | punpcklbw Vdq,Wdq (66),(VEX),(o128) +61: punpcklwd Pq,Qd | punpcklwd Vdq,Wdq (66),(VEX),(o128) +62: punpckldq Pq,Qd | punpckldq Vdq,Wdq (66),(VEX),(o128) +63: packsswb Pq,Qq | packsswb Vdq,Wdq (66),(VEX),(o128) +64: pcmpgtb Pq,Qq | pcmpgtb Vdq,Wdq (66),(VEX),(o128) +65: pcmpgtw Pq,Qq | pcmpgtw Vdq,Wdq (66),(VEX),(o128) +66: pcmpgtd Pq,Qq | pcmpgtd Vdq,Wdq (66),(VEX),(o128) +67: packuswb Pq,Qq | packuswb Vdq,Wdq (66),(VEX),(o128) +68: punpckhbw Pq,Qd | punpckhbw Vdq,Wdq (66),(VEX),(o128) +69: punpckhwd Pq,Qd | punpckhwd Vdq,Wdq (66),(VEX),(o128) +6a: punpckhdq Pq,Qd | punpckhdq Vdq,Wdq (66),(VEX),(o128) +6b: packssdw Pq,Qd | packssdw Vdq,Wdq (66),(VEX),(o128) +6c: punpcklqdq Vdq,Wdq (66),(VEX),(o128) +6d: punpckhqdq Vdq,Wdq (66),(VEX),(o128) +6e: movd/q/ Pd,Ed/q | movd/q Vdq,Ed/q (66),(VEX),(o128) +6f: movq Pq,Qq | movdqa Vdq,Wdq (66),(VEX) | movdqu Vdq,Wdq (F3),(VEX) # 0x0f 0x70-0x7f -70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66) | pshufhw Vdq,Wdq,Ib (F3) | pshuflw VdqWdq,Ib (F2) +70: pshufw Pq,Qq,Ib | pshufd Vdq,Wdq,Ib (66),(VEX),(o128) | pshufhw Vdq,Wdq,Ib (F3),(VEX),(o128) | pshuflw VdqWdq,Ib (F2),(VEX),(o128) 71: Grp12 (1A) 72: Grp13 (1A) 73: Grp14 (1A) -74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66) -75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66) -76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66) -77: emms +74: pcmpeqb Pq,Qq | pcmpeqb Vdq,Wdq (66),(VEX),(o128) +75: pcmpeqw Pq,Qq | pcmpeqw Vdq,Wdq (66),(VEX),(o128) +76: pcmpeqd Pq,Qq | pcmpeqd Vdq,Wdq (66),(VEX),(o128) +77: emms/vzeroupper/vzeroall (VEX) 78: VMREAD Ed/q,Gd/q 79: VMWRITE Gd/q,Ed/q 7a: 7b: -7c: haddps Vps,Wps (F2) | haddpd Vpd,Wpd (66) -7d: hsubps Vps,Wps (F2) | hsubpd Vpd,Wpd (66) -7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66) | movq Vq,Wq (F3) -7f: movq Qq,Pq | movdqa Wdq,Vdq (66) | movdqu Wdq,Vdq (F3) +7c: haddps Vps,Wps (F2),(VEX) | haddpd Vpd,Wpd (66),(VEX) +7d: hsubps Vps,Wps (F2),(VEX) | hsubpd Vpd,Wpd (66),(VEX) +7e: movd/q Ed/q,Pd | movd/q Ed/q,Vdq (66),(VEX),(o128) | movq Vq,Wq (F3),(VEX),(o128) +7f: movq Qq,Pq | movdqa Wdq,Vdq (66),(VEX) | movdqu Wdq,Vdq (F3),(VEX) # 0x0f 0x80-0x8f 80: JO Jz (f64) 81: JNO Jz (f64) @@ -500,11 +509,11 @@ bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf c0: XADD Eb,Gb c1: XADD Ev,Gv -c2: cmpps Vps,Wps,Ib | cmpss Vss,Wss,Ib (F3) | cmppd Vpd,Wpd,Ib (66) | cmpsd Vsd,Wsd,Ib (F2) +c2: cmpps Vps,Wps,Ib (VEX) | cmpss Vss,Wss,Ib (F3),(VEX),(o128) | cmppd Vpd,Wpd,Ib (66),(VEX) | cmpsd Vsd,Wsd,Ib (F2),(VEX) c3: movnti Md/q,Gd/q -c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66) -c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66) -c6: shufps Vps,Wps,Ib | shufpd Vpd,Wpd,Ib (66) +c4: pinsrw Pq,Rd/q/Mw,Ib | pinsrw Vdq,Rd/q/Mw,Ib (66),(VEX),(o128) +c5: pextrw Gd,Nq,Ib | pextrw Gd,Udq,Ib (66),(VEX),(o128) +c6: shufps Vps,Wps,Ib (VEX) | shufpd Vpd,Wpd,Ib (66),(VEX) c7: Grp9 (1A) c8: BSWAP RAX/EAX/R8/R8D c9: BSWAP RCX/ECX/R9/R9D @@ -515,77 +524,78 @@ cd: BSWAP RBP/EBP/R13/R13D ce: BSWAP RSI/ESI/R14/R14D cf: BSWAP RDI/EDI/R15/R15D # 0x0f 0xd0-0xdf -d0: addsubps Vps,Wps (F2) | addsubpd Vpd,Wpd (66) -d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66) -d2: psrld Pq,Qq | psrld Vdq,Wdq (66) -d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66) -d4: paddq Pq,Qq | paddq Vdq,Wdq (66) -d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66) -d6: movq Wq,Vq (66) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) -d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66) -d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66) -d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66) -da: pminub Pq,Qq | pminub Vdq,Wdq (66) -db: pand Pq,Qq | pand Vdq,Wdq (66) -dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66) -dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66) -de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66) -df: pandn Pq,Qq | pandn Vdq,Wdq (66) +d0: addsubps Vps,Wps (F2),(VEX) | addsubpd Vpd,Wpd (66),(VEX) +d1: psrlw Pq,Qq | psrlw Vdq,Wdq (66),(VEX),(o128) +d2: psrld Pq,Qq | psrld Vdq,Wdq (66),(VEX),(o128) +d3: psrlq Pq,Qq | psrlq Vdq,Wdq (66),(VEX),(o128) +d4: paddq Pq,Qq | paddq Vdq,Wdq (66),(VEX),(o128) +d5: pmullw Pq,Qq | pmullw Vdq,Wdq (66),(VEX),(o128) +d6: movq Wq,Vq (66),(VEX),(o128) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) +d7: pmovmskb Gd,Nq | pmovmskb Gd,Udq (66),(VEX),(o128) +d8: psubusb Pq,Qq | psubusb Vdq,Wdq (66),(VEX),(o128) +d9: psubusw Pq,Qq | psubusw Vdq,Wdq (66),(VEX),(o128) +da: pminub Pq,Qq | pminub Vdq,Wdq (66),(VEX),(o128) +db: pand Pq,Qq | pand Vdq,Wdq (66),(VEX),(o128) +dc: paddusb Pq,Qq | paddusb Vdq,Wdq (66),(VEX),(o128) +dd: paddusw Pq,Qq | paddusw Vdq,Wdq (66),(VEX),(o128) +de: pmaxub Pq,Qq | pmaxub Vdq,Wdq (66),(VEX),(o128) +df: pandn Pq,Qq | pandn Vdq,Wdq (66),(VEX),(o128) # 0x0f 0xe0-0xef -e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66) -e1: psraw Pq,Qq | psraw Vdq,Wdq (66) -e2: psrad Pq,Qq | psrad Vdq,Wdq (66) -e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66) -e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66) -e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66) -e6: cvtpd2dq Vdq,Wpd (F2) | cvttpd2dq Vdq,Wpd (66) | cvtdq2pd Vpd,Wdq (F3) -e7: movntq Mq,Pq | movntdq Mdq,Vdq (66) -e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66) -e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66) -ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66) -eb: por Pq,Qq | por Vdq,Wdq (66) -ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66) -ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66) -ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66) -ef: pxor Pq,Qq | pxor Vdq,Wdq (66) +e0: pavgb Pq,Qq | pavgb Vdq,Wdq (66),(VEX),(o128) +e1: psraw Pq,Qq | psraw Vdq,Wdq (66),(VEX),(o128) +e2: psrad Pq,Qq | psrad Vdq,Wdq (66),(VEX),(o128) +e3: pavgw Pq,Qq | pavgw Vdq,Wdq (66),(VEX),(o128) +e4: pmulhuw Pq,Qq | pmulhuw Vdq,Wdq (66),(VEX),(o128) +e5: pmulhw Pq,Qq | pmulhw Vdq,Wdq (66),(VEX),(o128) +e6: cvtpd2dq Vdq,Wpd (F2),(VEX) | cvttpd2dq Vdq,Wpd (66),(VEX) | cvtdq2pd Vpd,Wdq (F3),(VEX) +e7: movntq Mq,Pq | movntdq Mdq,Vdq (66),(VEX) +e8: psubsb Pq,Qq | psubsb Vdq,Wdq (66),(VEX),(o128) +e9: psubsw Pq,Qq | psubsw Vdq,Wdq (66),(VEX),(o128) +ea: pminsw Pq,Qq | pminsw Vdq,Wdq (66),(VEX),(o128) +eb: por Pq,Qq | por Vdq,Wdq (66),(VEX),(o128) +ec: paddsb Pq,Qq | paddsb Vdq,Wdq (66),(VEX),(o128) +ed: paddsw Pq,Qq | paddsw Vdq,Wdq (66),(VEX),(o128) +ee: pmaxsw Pq,Qq | pmaxsw Vdq,Wdq (66),(VEX),(o128) +ef: pxor Pq,Qq | pxor Vdq,Wdq (66),(VEX),(o128) # 0x0f 0xf0-0xff -f0: lddqu Vdq,Mdq (F2) -f1: psllw Pq,Qq | psllw Vdq,Wdq (66) -f2: pslld Pq,Qq | pslld Vdq,Wdq (66) -f3: psllq Pq,Qq | psllq Vdq,Wdq (66) -f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66) -f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66) -f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66) -f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66) -f8: psubb Pq,Qq | psubb Vdq,Wdq (66) -f9: psubw Pq,Qq | psubw Vdq,Wdq (66) -fa: psubd Pq,Qq | psubd Vdq,Wdq (66) -fb: psubq Pq,Qq | psubq Vdq,Wdq (66) -fc: paddb Pq,Qq | paddb Vdq,Wdq (66) -fd: paddw Pq,Qq | paddw Vdq,Wdq (66) -fe: paddd Pq,Qq | paddd Vdq,Wdq (66) +f0: lddqu Vdq,Mdq (F2),(VEX) +f1: psllw Pq,Qq | psllw Vdq,Wdq (66),(VEX),(o128) +f2: pslld Pq,Qq | pslld Vdq,Wdq (66),(VEX),(o128) +f3: psllq Pq,Qq | psllq Vdq,Wdq (66),(VEX),(o128) +f4: pmuludq Pq,Qq | pmuludq Vdq,Wdq (66),(VEX),(o128) +f5: pmaddwd Pq,Qq | pmaddwd Vdq,Wdq (66),(VEX),(o128) +f6: psadbw Pq,Qq | psadbw Vdq,Wdq (66),(VEX),(o128) +f7: maskmovq Pq,Nq | maskmovdqu Vdq,Udq (66),(VEX),(o128) +f8: psubb Pq,Qq | psubb Vdq,Wdq (66),(VEX),(o128) +f9: psubw Pq,Qq | psubw Vdq,Wdq (66),(VEX),(o128) +fa: psubd Pq,Qq | psubd Vdq,Wdq (66),(VEX),(o128) +fb: psubq Pq,Qq | psubq Vdq,Wdq (66),(VEX),(o128) +fc: paddb Pq,Qq | paddb Vdq,Wdq (66),(VEX),(o128) +fd: paddw Pq,Qq | paddw Vdq,Wdq (66),(VEX),(o128) +fe: paddd Pq,Qq | paddd Vdq,Wdq (66),(VEX),(o128) ff: EndTable Table: 3-byte opcode 1 (0x0f 0x38) Referrer: 3-byte escape 1 +AVXcode: 2 # 0x0f 0x38 0x00-0x0f -00: pshufb Pq,Qq | pshufb Vdq,Wdq (66) -01: phaddw Pq,Qq | phaddw Vdq,Wdq (66) -02: phaddd Pq,Qq | phaddd Vdq,Wdq (66) -03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66) -04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66) -05: phsubw Pq,Qq | phsubw Vdq,Wdq (66) -06: phsubd Pq,Qq | phsubd Vdq,Wdq (66) -07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66) -08: psignb Pq,Qq | psignb Vdq,Wdq (66) -09: psignw Pq,Qq | psignw Vdq,Wdq (66) -0a: psignd Pq,Qq | psignd Vdq,Wdq (66) -0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66) -0c: -0d: -0e: -0f: +00: pshufb Pq,Qq | pshufb Vdq,Wdq (66),(VEX),(o128) +01: phaddw Pq,Qq | phaddw Vdq,Wdq (66),(VEX),(o128) +02: phaddd Pq,Qq | phaddd Vdq,Wdq (66),(VEX),(o128) +03: phaddsw Pq,Qq | phaddsw Vdq,Wdq (66),(VEX),(o128) +04: pmaddubsw Pq,Qq | pmaddubsw Vdq,Wdq (66),(VEX),(o128) +05: phsubw Pq,Qq | phsubw Vdq,Wdq (66),(VEX),(o128) +06: phsubd Pq,Qq | phsubd Vdq,Wdq (66),(VEX),(o128) +07: phsubsw Pq,Qq | phsubsw Vdq,Wdq (66),(VEX),(o128) +08: psignb Pq,Qq | psignb Vdq,Wdq (66),(VEX),(o128) +09: psignw Pq,Qq | psignw Vdq,Wdq (66),(VEX),(o128) +0a: psignd Pq,Qq | psignd Vdq,Wdq (66),(VEX),(o128) +0b: pmulhrsw Pq,Qq | pmulhrsw Vdq,Wdq (66),(VEX),(o128) +0c: Vpermilps /r (66),(oVEX) +0d: Vpermilpd /r (66),(oVEX) +0e: vtestps /r (66),(oVEX) +0f: vtestpd /r (66),(oVEX) # 0x0f 0x38 0x10-0x1f 10: pblendvb Vdq,Wdq (66) 11: @@ -594,90 +604,99 @@ Referrer: 3-byte escape 1 14: blendvps Vdq,Wdq (66) 15: blendvpd Vdq,Wdq (66) 16: -17: ptest Vdq,Wdq (66) -18: -19: -1a: +17: ptest Vdq,Wdq (66),(VEX) +18: vbroadcastss /r (66),(oVEX) +19: vbroadcastsd /r (66),(oVEX),(o256) +1a: vbroadcastf128 /r (66),(oVEX),(o256) 1b: -1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66) -1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66) -1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66) +1c: pabsb Pq,Qq | pabsb Vdq,Wdq (66),(VEX),(o128) +1d: pabsw Pq,Qq | pabsw Vdq,Wdq (66),(VEX),(o128) +1e: pabsd Pq,Qq | pabsd Vdq,Wdq (66),(VEX),(o128) 1f: # 0x0f 0x38 0x20-0x2f -20: pmovsxbw Vdq,Udq/Mq (66) -21: pmovsxbd Vdq,Udq/Md (66) -22: pmovsxbq Vdq,Udq/Mw (66) -23: pmovsxwd Vdq,Udq/Mq (66) -24: pmovsxwq Vdq,Udq/Md (66) -25: pmovsxdq Vdq,Udq/Mq (66) +20: pmovsxbw Vdq,Udq/Mq (66),(VEX),(o128) +21: pmovsxbd Vdq,Udq/Md (66),(VEX),(o128) +22: pmovsxbq Vdq,Udq/Mw (66),(VEX),(o128) +23: pmovsxwd Vdq,Udq/Mq (66),(VEX),(o128) +24: pmovsxwq Vdq,Udq/Md (66),(VEX),(o128) +25: pmovsxdq Vdq,Udq/Mq (66),(VEX),(o128) 26: 27: -28: pmuldq Vdq,Wdq (66) -29: pcmpeqq Vdq,Wdq (66) -2a: movntdqa Vdq,Mdq (66) -2b: packusdw Vdq,Wdq (66) -2c: -2d: -2e: -2f: +28: pmuldq Vdq,Wdq (66),(VEX),(o128) +29: pcmpeqq Vdq,Wdq (66),(VEX),(o128) +2a: movntdqa Vdq,Mdq (66),(VEX),(o128) +2b: packusdw Vdq,Wdq (66),(VEX),(o128) +2c: vmaskmovps(ld) /r (66),(oVEX) +2d: vmaskmovpd(ld) /r (66),(oVEX) +2e: vmaskmovps(st) /r (66),(oVEX) +2f: vmaskmovpd(st) /r (66),(oVEX) # 0x0f 0x38 0x30-0x3f -30: pmovzxbw Vdq,Udq/Mq (66) -31: pmovzxbd Vdq,Udq/Md (66) -32: pmovzxbq Vdq,Udq/Mw (66) -33: pmovzxwd Vdq,Udq/Mq (66) -34: pmovzxwq Vdq,Udq/Md (66) -35: pmovzxdq Vdq,Udq/Mq (66) +30: pmovzxbw Vdq,Udq/Mq (66),(VEX),(o128) +31: pmovzxbd Vdq,Udq/Md (66),(VEX),(o128) +32: pmovzxbq Vdq,Udq/Mw (66),(VEX),(o128) +33: pmovzxwd Vdq,Udq/Mq (66),(VEX),(o128) +34: pmovzxwq Vdq,Udq/Md (66),(VEX),(o128) +35: pmovzxdq Vdq,Udq/Mq (66),(VEX),(o128) 36: -37: pcmpgtq Vdq,Wdq (66) -38: pminsb Vdq,Wdq (66) -39: pminsd Vdq,Wdq (66) -3a: pminuw Vdq,Wdq (66) -3b: pminud Vdq,Wdq (66) -3c: pmaxsb Vdq,Wdq (66) -3d: pmaxsd Vdq,Wdq (66) -3e: pmaxuw Vdq,Wdq (66) -3f: pmaxud Vdq,Wdq (66) +37: pcmpgtq Vdq,Wdq (66),(VEX),(o128) +38: pminsb Vdq,Wdq (66),(VEX),(o128) +39: pminsd Vdq,Wdq (66),(VEX),(o128) +3a: pminuw Vdq,Wdq (66),(VEX),(o128) +3b: pminud Vdq,Wdq (66),(VEX),(o128) +3c: pmaxsb Vdq,Wdq (66),(VEX),(o128) +3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) +3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) +3f: pmaxud Vdq,Wdq (66),(VEX),(o128) # 0x0f 0x38 0x4f-0xff -40: pmulld Vdq,Wdq (66) -41: phminposuw Vdq,Wdq (66) +40: pmulld Vdq,Wdq (66),(VEX),(o128) +41: phminposuw Vdq,Wdq (66),(VEX),(o128) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) -db: aesimc Vdq,Wdq (66) -dc: aesenc Vdq,Wdq (66) -dd: aesenclast Vdq,Wdq (66) -de: aesdec Vdq,Wdq (66) -df: aesdeclast Vdq,Wdq (66) +db: aesimc Vdq,Wdq (66),(VEX),(o128) +dc: aesenc Vdq,Wdq (66),(VEX),(o128) +dd: aesenclast Vdq,Wdq (66),(VEX),(o128) +de: aesdec Vdq,Wdq (66),(VEX),(o128) +df: aesdeclast Vdq,Wdq (66),(VEX),(o128) f0: MOVBE Gv,Mv | CRC32 Gd,Eb (F2) f1: MOVBE Mv,Gv | CRC32 Gd,Ev (F2) EndTable Table: 3-byte opcode 2 (0x0f 0x3a) Referrer: 3-byte escape 2 +AVXcode: 3 # 0x0f 0x3a 0x00-0xff -08: roundps Vdq,Wdq,Ib (66) -09: roundpd Vdq,Wdq,Ib (66) -0a: roundss Vss,Wss,Ib (66) -0b: roundsd Vsd,Wsd,Ib (66) -0c: blendps Vdq,Wdq,Ib (66) -0d: blendpd Vdq,Wdq,Ib (66) -0e: pblendw Vdq,Wdq,Ib (66) -0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66) -14: pextrb Rd/Mb,Vdq,Ib (66) -15: pextrw Rd/Mw,Vdq,Ib (66) -16: pextrd/pextrq Ed/q,Vdq,Ib (66) -17: extractps Ed,Vdq,Ib (66) -20: pinsrb Vdq,Rd/q/Mb,Ib (66) -21: insertps Vdq,Udq/Md,Ib (66) -22: pinsrd/pinsrq Vdq,Ed/q,Ib (66) -40: dpps Vdq,Wdq,Ib (66) -41: dppd Vdq,Wdq,Ib (66) -42: mpsadbw Vdq,Wdq,Ib (66) -44: pclmulq Vdq,Wdq,Ib (66) -60: pcmpestrm Vdq,Wdq,Ib (66) -61: pcmpestri Vdq,Wdq,Ib (66) -62: pcmpistrm Vdq,Wdq,Ib (66) -63: pcmpistri Vdq,Wdq,Ib (66) -df: aeskeygenassist Vdq,Wdq,Ib (66) +04: vpermilps /r,Ib (66),(oVEX) +05: vpermilpd /r,Ib (66),(oVEX) +06: vperm2f128 /r,Ib (66),(oVEX),(o256) +08: roundps Vdq,Wdq,Ib (66),(VEX) +09: roundpd Vdq,Wdq,Ib (66),(VEX) +0a: roundss Vss,Wss,Ib (66),(VEX),(o128) +0b: roundsd Vsd,Wsd,Ib (66),(VEX),(o128) +0c: blendps Vdq,Wdq,Ib (66),(VEX) +0d: blendpd Vdq,Wdq,Ib (66),(VEX) +0e: pblendw Vdq,Wdq,Ib (66),(VEX),(o128) +0f: palignr Pq,Qq,Ib | palignr Vdq,Wdq,Ib (66),(VEX),(o128) +14: pextrb Rd/Mb,Vdq,Ib (66),(VEX),(o128) +15: pextrw Rd/Mw,Vdq,Ib (66),(VEX),(o128) +16: pextrd/pextrq Ed/q,Vdq,Ib (66),(VEX),(o128) +17: extractps Ed,Vdq,Ib (66),(VEX),(o128) +18: vinsertf128 /r,Ib (66),(oVEX),(o256) +19: vextractf128 /r,Ib (66),(oVEX),(o256) +20: pinsrb Vdq,Rd/q/Mb,Ib (66),(VEX),(o128) +21: insertps Vdq,Udq/Md,Ib (66),(VEX),(o128) +22: pinsrd/pinsrq Vdq,Ed/q,Ib (66),(VEX),(o128) +40: dpps Vdq,Wdq,Ib (66),(VEX) +41: dppd Vdq,Wdq,Ib (66),(VEX),(o128) +42: mpsadbw Vdq,Wdq,Ib (66),(VEX),(o128) +44: pclmulq Vdq,Wdq,Ib (66),(VEX),(o128) +4a: vblendvps /r,Ib (66),(oVEX) +4b: vblendvpd /r,Ib (66),(oVEX) +4c: vpblendvb /r,Ib (66),(oVEX),(o128) +60: pcmpestrm Vdq,Wdq,Ib (66),(VEX),(o128) +61: pcmpestri Vdq,Wdq,Ib (66),(VEX),(o128) +62: pcmpistrm Vdq,Wdq,Ib (66),(VEX),(o128) +63: pcmpistri Vdq,Wdq,Ib (66),(VEX),(o128) +df: aeskeygenassist Vdq,Wdq,Ib (66),(VEX),(o128) EndTable GrpTable: Grp1 @@ -785,29 +804,29 @@ GrpTable: Grp11 EndTable GrpTable: Grp12 -2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B) -4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B) -6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B) +2: psrlw Nq,Ib (11B) | psrlw Udq,Ib (66),(11B),(VEX),(o128) +4: psraw Nq,Ib (11B) | psraw Udq,Ib (66),(11B),(VEX),(o128) +6: psllw Nq,Ib (11B) | psllw Udq,Ib (66),(11B),(VEX),(o128) EndTable GrpTable: Grp13 -2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B) -4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B) -6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B) +2: psrld Nq,Ib (11B) | psrld Udq,Ib (66),(11B),(VEX),(o128) +4: psrad Nq,Ib (11B) | psrad Udq,Ib (66),(11B),(VEX),(o128) +6: pslld Nq,Ib (11B) | pslld Udq,Ib (66),(11B),(VEX),(o128) EndTable GrpTable: Grp14 -2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B) -3: psrldq Udq,Ib (66),(11B) -6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B) -7: pslldq Udq,Ib (66),(11B) +2: psrlq Nq,Ib (11B) | psrlq Udq,Ib (66),(11B),(VEX),(o128) +3: psrldq Udq,Ib (66),(11B),(VEX),(o128) +6: psllq Nq,Ib (11B) | psllq Udq,Ib (66),(11B),(VEX),(o128) +7: pslldq Udq,Ib (66),(11B),(VEX),(o128) EndTable GrpTable: Grp15 0: fxsave 1: fxstor -2: ldmxcsr -3: stmxcsr +2: ldmxcsr (VEX) +3: stmxcsr (VEX) 4: XSAVE 5: XRSTOR | lfence (11B) 6: mfence (11B) diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 7d54929..e34e92a 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -13,6 +13,18 @@ function check_awk_implement() { return "" } +# Clear working vars +function clear_vars() { + delete table + delete lptable2 + delete lptable1 + delete lptable3 + eid = -1 # escape id + gid = -1 # group id + aid = -1 # AVX id + tname = "" +} + BEGIN { # Implementation error checking awkchecked = check_awk_implement() @@ -24,11 +36,15 @@ BEGIN { # Setup generating tables print "/* x86 opcode map generated from x86-opcode-map.txt */" - print "/* Do not change this code. */" + print "/* Do not change this code. */\n" ggid = 1 geid = 1 + gaid = 0 + delete etable + delete gtable + delete atable - opnd_expr = "^[[:alpha:]]" + opnd_expr = "^[[:alpha:]/]" ext_expr = "^\\(" sep_expr = "^\\|$" group_expr = "^Grp[[:alnum:]]+" @@ -46,19 +62,19 @@ BEGIN { imm_flag["Ob"] = "INAT_MOFFSET" imm_flag["Ov"] = "INAT_MOFFSET" - modrm_expr = "^([CDEGMNPQRSUVW][[:lower:]]+|NTA|T[012])" + modrm_expr = "^([CDEGMNPQRSUVW/][[:lower:]]+|NTA|T[012])" force64_expr = "\\([df]64\\)" rex_expr = "^REX(\\.[XRWB]+)*" fpu_expr = "^ESC" # TODO lprefix1_expr = "\\(66\\)" - delete lptable1 - lprefix2_expr = "\\(F2\\)" - delete lptable2 - lprefix3_expr = "\\(F3\\)" - delete lptable3 + lprefix2_expr = "\\(F3\\)" + lprefix3_expr = "\\(F2\\)" max_lprefix = 4 + vexok_expr = "\\(VEX\\)" + vexonly_expr = "\\(oVEX\\)" + prefix_expr = "\\(Prefix\\)" prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" prefix_num["REPNE"] = "INAT_PFX_REPNE" @@ -71,12 +87,10 @@ BEGIN { prefix_num["SEG=GS"] = "INAT_PFX_GS" prefix_num["SEG=SS"] = "INAT_PFX_SS" prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" + prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" + prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" - delete table - delete etable - delete gtable - eid = -1 - gid = -1 + clear_vars() } function semantic_error(msg) { @@ -97,14 +111,12 @@ function array_size(arr, i,c) { /^Table:/ { print "/* " $0 " */" + if (tname != "") + semantic_error("Hit Table: before EndTable:."); } /^Referrer:/ { - if (NF == 1) { - # primary opcode table - tname = "inat_primary_table" - eid = -1 - } else { + if (NF != 1) { # escape opcode table ref = "" for (i = 2; i <= NF; i++) @@ -114,6 +126,19 @@ function array_size(arr, i,c) { } } +/^AVXcode:/ { + if (NF != 1) { + # AVX/escape opcode table + aid = $2 + if (gaid <= aid) + gaid = aid + 1 + if (tname == "") # AVX only opcode table + tname = sprintf("inat_avx_table_%d", $2) + } + if (aid == -1 && eid == -1) # primary opcode table + tname = "inat_primary_table" +} + /^GrpTable:/ { print "/* " $0 " */" if (!($2 in group)) @@ -162,30 +187,33 @@ function print_table(tbl,name,fmt,n) print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,0] = tname + if (aid >= 0) + atable[aid,0] = tname } if (array_size(lptable1) != 0) { print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,1] = tname "_1" + if (aid >= 0) + atable[aid,1] = tname "_1" } if (array_size(lptable2) != 0) { print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,2] = tname "_2" + if (aid >= 0) + atable[aid,2] = tname "_2" } if (array_size(lptable3) != 0) { print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", "0x%02x", 256) etable[eid,3] = tname "_3" + if (aid >= 0) + atable[aid,3] = tname "_3" } } print "" - delete table - delete lptable1 - delete lptable2 - delete lptable3 - gid = -1 - eid = -1 + clear_vars() } function add_flags(old,new) { @@ -284,6 +312,14 @@ function convert_operands(opnd, i,imm,mod) if (match(opcode, fpu_expr)) flags = add_flags(flags, "INAT_MODRM") + # check VEX only code + if (match(ext, vexonly_expr)) + flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") + + # check VEX only code + if (match(ext, vexok_expr)) + flags = add_flags(flags, "INAT_VEXOK") + # check prefixes if (match(ext, prefix_expr)) { if (!prefix_num[opcode]) @@ -330,5 +366,15 @@ END { for (j = 0; j < max_lprefix; j++) if (gtable[i,j]) print " ["i"]["j"] = "gtable[i,j]"," + print "};\n" + # print AVX opcode map's array + print "/* AVX opcode map array */" + print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\ + "[INAT_LSTPFX_MAX + 1] = {" + for (i = 0; i < gaid; i++) + for (j = 0; j < max_lprefix; j++) + if (atable[i,j]) + print " ["i"]["j"] = "atable[i,j]"," print "};" } + -- cgit v0.10.2 From 3f7e454af1dd8b9cea410d9380d3f71477e94f2b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:35 -0400 Subject: x86: Add Intel FMA instructions to x86 opcode map Add Intel FMA(FUSED-MULTIPLY-ADD) instructions to x86 opcode map for x86 instruction decoder. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204235.30545.33997.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 9887bfe..a793da5 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -647,11 +647,43 @@ AVXcode: 2 3d: pmaxsd Vdq,Wdq (66),(VEX),(o128) 3e: pmaxuw Vdq,Wdq (66),(VEX),(o128) 3f: pmaxud Vdq,Wdq (66),(VEX),(o128) -# 0x0f 0x38 0x4f-0xff +# 0x0f 0x38 0x40-0x8f 40: pmulld Vdq,Wdq (66),(VEX),(o128) 41: phminposuw Vdq,Wdq (66),(VEX),(o128) 80: INVEPT Gd/q,Mdq (66) 81: INVPID Gd/q,Mdq (66) +# 0x0f 0x38 0x90-0xbf (FMA) +96: vfmaddsub132pd/ps /r (66),(VEX) +97: vfmsubadd132pd/ps /r (66),(VEX) +98: vfmadd132pd/ps /r (66),(VEX) +99: vfmadd132sd/ss /r (66),(VEX),(o128) +9a: vfmsub132pd/ps /r (66),(VEX) +9b: vfmsub132sd/ss /r (66),(VEX),(o128) +9c: vfnmadd132pd/ps /r (66),(VEX) +9d: vfnmadd132sd/ss /r (66),(VEX),(o128) +9e: vfnmsub132pd/ps /r (66),(VEX) +9f: vfnmsub132sd/ss /r (66),(VEX),(o128) +a6: vfmaddsub213pd/ps /r (66),(VEX) +a7: vfmsubadd213pd/ps /r (66),(VEX) +a8: vfmadd213pd/ps /r (66),(VEX) +a9: vfmadd213sd/ss /r (66),(VEX),(o128) +aa: vfmsub213pd/ps /r (66),(VEX) +ab: vfmsub213sd/ss /r (66),(VEX),(o128) +ac: vfnmadd213pd/ps /r (66),(VEX) +ad: vfnmadd213sd/ss /r (66),(VEX),(o128) +ae: vfnmsub213pd/ps /r (66),(VEX) +af: vfnmsub213sd/ss /r (66),(VEX),(o128) +b6: vfmaddsub231pd/ps /r (66),(VEX) +b7: vfmsubadd231pd/ps /r (66),(VEX) +b8: vfmadd231pd/ps /r (66),(VEX) +b9: vfmadd231sd/ss /r (66),(VEX),(o128) +ba: vfmsub231pd/ps /r (66),(VEX) +bb: vfmsub231sd/ss /r (66),(VEX),(o128) +bc: vfnmadd231pd/ps /r (66),(VEX) +bd: vfnmadd231sd/ss /r (66),(VEX),(o128) +be: vfnmsub231pd/ps /r (66),(VEX) +bf: vfnmsub231sd/ss /r (66),(VEX),(o128) +# 0x0f 0x38 0xc0-0xff db: aesimc Vdq,Wdq (66),(VEX),(o128) dc: aesenc Vdq,Wdq (66),(VEX),(o128) dd: aesenclast Vdq,Wdq (66),(VEX),(o128) -- cgit v0.10.2 From dd004c475cd15a5749b04b0283d41ffdfa57d658 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:44 -0400 Subject: kprobe-tracer: Compare both of event-name and event-group to find probe Fix find_probe_event() to compare both of event-name and event-group. Without this fix, kprobe-tracer overwrites existing same event-name probe even if its group-name is different. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204244.30545.27516.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b8ef707..a86c3ac 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -353,12 +353,14 @@ static void free_trace_probe(struct trace_probe *tp) kfree(tp); } -static struct trace_probe *find_probe_event(const char *event) +static struct trace_probe *find_probe_event(const char *event, + const char *group) { struct trace_probe *tp; list_for_each_entry(tp, &probe_list, list) - if (!strcmp(tp->call.name, event)) + if (strcmp(tp->call.name, event) == 0 && + strcmp(tp->call.system, group) == 0) return tp; return NULL; } @@ -383,7 +385,7 @@ static int register_trace_probe(struct trace_probe *tp) mutex_lock(&probe_lock); /* register as an event */ - old_tp = find_probe_event(tp->call.name); + old_tp = find_probe_event(tp->call.name, tp->call.system); if (old_tp) { /* delete old event */ unregister_trace_probe(old_tp); -- cgit v0.10.2 From 8030c5f5a57e018fcdeb1f395d7adc123b48ced6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:42:53 -0400 Subject: perf/probes: Exit searching after finding target function Exit searching after finding real (not-inlined) function, because there should be no same symbol in that CU. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204252.30545.19251.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 54e7071..b98d35e 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -585,14 +585,14 @@ static int probefunc_callback(struct die_link *dlink, void *data) DIE_IF(ret != DW_DLV_OK); pr_debug("inline definition offset %lld\n", pf->inl_offs); - return 0; + return 0; /* Continue to search */ } /* Get probe address */ pf->addr = die_get_entrypc(dlink->die); pf->addr += pp->offset; /* TODO: Check the address in this function */ show_probepoint(dlink->die, pp->offset, pf); - /* Continue to search */ + return 1; /* Exit; no same symbol in this CU. */ } } else if (tag == DW_TAG_inlined_subroutine && pf->inl_offs) { if (die_get_abstract_origin(dlink->die) == pf->inl_offs) { -- cgit v0.10.2 From 46ab49267d338eb5056d0077e16346509b9e9284 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:43:02 -0400 Subject: perf/probes: Improve command-line option of perf-probe Change command-line option from -P to --add, and accepting probes without --add too. perf probe --add "probe-define" or, just: perf probe "probe-define" Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204301.30545.48600.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index dcb406c..3370dab 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -65,8 +65,8 @@ static struct { #define semantic_error(msg ...) die("Semantic error :" msg) -static int parse_probepoint(const struct option *opt __used, - const char *str, int unset __used) +/* Parse a probe point. Note that any error must die. */ +static void parse_probepoint(const char *str) { char *argv[MAX_PROBE_ARGS + 2]; /* Event + probe + args */ int argc, i; @@ -75,9 +75,6 @@ static int parse_probepoint(const struct option *opt __used, char **event = &session.events[session.nr_probe]; int retp = 0; - if (!str) /* The end of probe points */ - return 0; - pr_debug("probe-definition(%d): %s\n", session.nr_probe, str); if (++session.nr_probe == MAX_PROBES) semantic_error("Too many probes"); @@ -176,6 +173,13 @@ static int parse_probepoint(const struct option *opt __used, } pr_debug("%d arguments\n", pp->nr_args); +} + +static int opt_add_probepoint(const struct option *opt __used, + const char *str, int unset __used) +{ + if (str) + parse_probepoint(str); return 0; } @@ -211,7 +215,8 @@ static int open_default_vmlinux(void) #endif static const char * const probe_usage[] = { - "perf probe [] -P 'PROBEDEF' [-P 'PROBEDEF' ...]", + "perf probe [] 'PROBEDEF' ['PROBEDEF' ...]", + "perf probe [] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", NULL }; @@ -222,7 +227,7 @@ static const struct option options[] = { OPT_STRING('k', "vmlinux", &session.vmlinux, "file", "vmlinux/module pathname"), #endif - OPT_CALLBACK('P', "probe", NULL, + OPT_CALLBACK('a', "add", NULL, #ifdef NO_LIBDWARF "p|r:[GRP/]NAME FUNC[+OFFS] [ARG ...]", #else @@ -243,7 +248,7 @@ static const struct option options[] = { "\t\tARG:\tProbe argument (local variable name or\n" #endif "\t\t\tkprobe-tracer argument format is supported.)\n", - parse_probepoint), + opt_add_probepoint), OPT_END() }; @@ -296,8 +301,11 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) char buf[MAX_CMDLEN]; argc = parse_options(argc, argv, options, probe_usage, - PARSE_OPT_STOP_AT_NON_OPTION); - if (argc || session.nr_probe == 0) + PARSE_OPT_STOP_AT_NON_OPTION); + for (i = 0; i < argc; i++) + parse_probe_event(argv[i]); + + if (session.nr_probe == 0) usage_with_options(probe_usage, options); #ifdef NO_LIBDWARF -- cgit v0.10.2 From 253977b0d87fbb793f12b1661a763ae264028ccf Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:43:10 -0400 Subject: perf/probes: Improve probe point syntax of perf-probe This changes probe point syntax of perf-probe as below [:ABS_LN] [ARGS] or [+OFFS|%return][@SRC] [ARGS] And event name and event group name are automatically generated based on probe-symbol and offset as below. perfprobes/SYMBOL_OFFSET[_NUM] Where SYMBOL is the probing symbol and OFFSET is the byte offset from the symbol. Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204310.30545.84984.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 3370dab..92b4c49 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -52,6 +52,7 @@ const char *default_search_path[NR_SEARCH_PATH] = { #define MAX_PATH_LEN 256 #define MAX_PROBES 128 #define MAX_PROBE_ARGS 128 +#define PERFPROBE_GROUP "perfprobe" /* Session management structure */ static struct { @@ -60,20 +61,100 @@ static struct { int need_dwarf; int nr_probe; struct probe_point probes[MAX_PROBES]; - char *events[MAX_PROBES]; } session; #define semantic_error(msg ...) die("Semantic error :" msg) -/* Parse a probe point. Note that any error must die. */ -static void parse_probepoint(const char *str) +/* Parse probe point. Return 1 if return probe */ +static void parse_probe_point(char *arg, struct probe_point *pp) +{ + char *ptr, *tmp; + char c, nc; + /* + * + * perf probe SRC:LN + * perf probe FUNC[+OFFS|%return][@SRC] + */ + + ptr = strpbrk(arg, ":+@%"); + if (ptr) { + nc = *ptr; + *ptr++ = '\0'; + } + + /* Check arg is function or file and copy it */ + if (strchr(arg, '.')) /* File */ + pp->file = strdup(arg); + else /* Function */ + pp->function = strdup(arg); + DIE_IF(pp->file == NULL && pp->function == NULL); + + /* Parse other options */ + while (ptr) { + arg = ptr; + c = nc; + ptr = strpbrk(arg, ":+@%"); + if (ptr) { + nc = *ptr; + *ptr++ = '\0'; + } + switch (c) { + case ':': /* Line number */ + pp->line = strtoul(arg, &tmp, 0); + if (*tmp != '\0') + semantic_error("There is non-digit charactor" + " in line number."); + break; + case '+': /* Byte offset from a symbol */ + pp->offset = strtoul(arg, &tmp, 0); + if (*tmp != '\0') + semantic_error("There is non-digit charactor" + " in offset."); + break; + case '@': /* File name */ + if (pp->file) + semantic_error("SRC@SRC is not allowed."); + pp->file = strdup(arg); + DIE_IF(pp->file == NULL); + if (ptr) + semantic_error("@SRC must be the last " + "option."); + break; + case '%': /* Probe places */ + if (strcmp(arg, "return") == 0) { + pp->retprobe = 1; + } else /* Others not supported yet */ + semantic_error("%%%s is not supported.", arg); + break; + default: + DIE_IF("Program has a bug."); + break; + } + } + + /* Exclusion check */ + if (pp->line && pp->function) + semantic_error("Function-relative line number is not" + " supported yet."); + if (!pp->line && pp->file && !pp->function) + semantic_error("File always requires line number."); + if (pp->offset && !pp->function) + semantic_error("Offset requires an entry function."); + if (pp->retprobe && !pp->function) + semantic_error("Return probe requires an entry function."); + if (pp->offset && pp->retprobe) + semantic_error("Offset can't be used with return probe."); + + pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n", + pp->function, pp->file, pp->line, pp->offset, pp->retprobe); +} + +/* Parse an event definition. Note that any error must die. */ +static void parse_probe_event(const char *str) { char *argv[MAX_PROBE_ARGS + 2]; /* Event + probe + args */ int argc, i; - char *arg, *ptr; struct probe_point *pp = &session.probes[session.nr_probe]; - char **event = &session.events[session.nr_probe]; - int retp = 0; pr_debug("probe-definition(%d): %s\n", session.nr_probe, str); if (++session.nr_probe == MAX_PROBES) @@ -103,70 +184,28 @@ static void parse_probepoint(const char *str) pr_debug("argv[%d]=%s\n", argc, argv[argc - 1]); } } while (*str != '\0'); - if (argc < 2) - semantic_error("Need event-name and probe-point at least."); - - /* Parse the event name */ - if (argv[0][0] == 'r') - retp = 1; - else if (argv[0][0] != 'p') - semantic_error("You must specify 'p'(kprobe) or" - " 'r'(kretprobe) first."); - /* TODO: check event name */ - *event = argv[0]; + if (!argc) + semantic_error("An empty argument."); /* Parse probe point */ - arg = argv[1]; - if (arg[0] == '@') { - /* Source Line */ - arg++; - ptr = strchr(arg, ':'); - if (!ptr || !isdigit(ptr[1])) - semantic_error("Line number is required."); - *ptr++ = '\0'; - if (strlen(arg) == 0) - semantic_error("No file name."); - pp->file = strdup(arg); - pp->line = atoi(ptr); - if (!pp->file || !pp->line) - semantic_error("Failed to parse line."); - pr_debug("file:%s line:%d\n", pp->file, pp->line); - } else { - /* Function name */ - ptr = strchr(arg, '+'); - if (ptr) { - if (!isdigit(ptr[1])) - semantic_error("Offset is required."); - *ptr++ = '\0'; - pp->offset = atoi(ptr); - } else - ptr = arg; - ptr = strchr(ptr, '@'); - if (ptr) { - *ptr++ = '\0'; - pp->file = strdup(ptr); - } - pp->function = strdup(arg); - pr_debug("symbol:%s file:%s offset:%d\n", - pp->function, pp->file, pp->offset); - } - free(argv[1]); + parse_probe_point(argv[0], pp); + free(argv[0]); if (pp->file) session.need_dwarf = 1; /* Copy arguments */ - pp->nr_args = argc - 2; + pp->nr_args = argc - 1; if (pp->nr_args > 0) { pp->args = (char **)malloc(sizeof(char *) * pp->nr_args); if (!pp->args) die("malloc"); - memcpy(pp->args, &argv[2], sizeof(char *) * pp->nr_args); + memcpy(pp->args, &argv[1], sizeof(char *) * pp->nr_args); } /* Ensure return probe has no C argument */ for (i = 0; i < pp->nr_args; i++) if (is_c_varname(pp->args[i])) { - if (retp) + if (pp->retprobe) semantic_error("You can't specify local" " variable for kretprobe"); session.need_dwarf = 1; @@ -175,11 +214,11 @@ static void parse_probepoint(const char *str) pr_debug("%d arguments\n", pp->nr_args); } -static int opt_add_probepoint(const struct option *opt __used, +static int opt_add_probe_event(const struct option *opt __used, const char *str, int unset __used) { if (str) - parse_probepoint(str); + parse_probe_event(str); return 0; } @@ -229,17 +268,16 @@ static const struct option options[] = { #endif OPT_CALLBACK('a', "add", NULL, #ifdef NO_LIBDWARF - "p|r:[GRP/]NAME FUNC[+OFFS] [ARG ...]", + "FUNC[+OFFS|%return] [ARG ...]", #else - "p|r:[GRP/]NAME FUNC[+OFFS][@SRC]|@SRC:LINE [ARG ...]", + "FUNC[+OFFS|%return][@SRC]|SRC:LINE [ARG ...]", #endif "probe point definition, where\n" - "\t\tp:\tkprobe probe\n" - "\t\tr:\tkretprobe probe\n" "\t\tGRP:\tGroup name (optional)\n" "\t\tNAME:\tEvent name\n" "\t\tFUNC:\tFunction name\n" "\t\tOFFS:\tOffset from function entry (in byte)\n" + "\t\t%return:\tPut the probe at function return\n" #ifdef NO_LIBDWARF "\t\tARG:\tProbe argument (only \n" #else @@ -248,7 +286,7 @@ static const struct option options[] = { "\t\tARG:\tProbe argument (local variable name or\n" #endif "\t\t\tkprobe-tracer argument format is supported.)\n", - opt_add_probepoint), + opt_add_probe_event), OPT_END() }; @@ -266,7 +304,7 @@ static int write_new_event(int fd, const char *buf) #define MAX_CMDLEN 256 -static int synthesize_probepoint(struct probe_point *pp) +static int synthesize_probe_event(struct probe_point *pp) { char *buf; int i, len, ret; @@ -316,12 +354,12 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) /* Synthesize probes without dwarf */ for (j = 0; j < session.nr_probe; j++) { #ifndef NO_LIBDWARF - if (session.events[j][0] != 'r') { + if (!session.probes[j].retprobe) { session.need_dwarf = 1; continue; } #endif - ret = synthesize_probepoint(&session.probes[j]); + ret = synthesize_probe_event(&session.probes[j]); if (ret == -E2BIG) semantic_error("probe point is too long."); else if (ret < 0) @@ -349,7 +387,6 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) ret = find_probepoint(fd, pp); if (ret <= 0) die("No probe point found.\n"); - pr_debug("probe event %s found\n", session.events[j]); } close(fd); @@ -364,13 +401,17 @@ setup_probes: for (j = 0; j < session.nr_probe; j++) { pp = &session.probes[j]; if (pp->found == 1) { - snprintf(buf, MAX_CMDLEN, "%s %s\n", - session.events[j], pp->probes[0]); + snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x %s\n", + pp->retprobe ? 'r' : 'p', PERFPROBE_GROUP, + pp->function, pp->offset, pp->probes[0]); write_new_event(fd, buf); } else for (i = 0; i < pp->found; i++) { - snprintf(buf, MAX_CMDLEN, "%s%d %s\n", - session.events[j], i, pp->probes[i]); + snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x_%d %s\n", + pp->retprobe ? 'r' : 'p', + PERFPROBE_GROUP, + pp->function, pp->offset, i, + pp->probes[0]); write_new_event(fd, buf); } } diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index b98d35e..6d3bac9 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -483,10 +483,20 @@ static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs, if (ret == DW_DLV_OK) { ret = snprintf(tmp, MAX_PROBE_BUFFER, "%s+%u", name, (unsigned int)offs); + /* Copy the function name if possible */ + if (!pp->function) { + pp->function = strdup(name); + pp->offset = offs; + } dwarf_dealloc(__dw_debug, name, DW_DLA_STRING); } else { /* This function has no name. */ ret = snprintf(tmp, MAX_PROBE_BUFFER, "0x%llx", pf->addr); + if (!pp->function) { + /* TODO: Use _stext */ + pp->function = strdup(""); + pp->offset = (int)pf->addr; + } } DIE_IF(ret < 0); DIE_IF(ret >= MAX_PROBE_BUFFER); diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index d17fafc..240d6cb 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -22,6 +22,8 @@ struct probe_point { int nr_args; /* Number of arguments */ char **args; /* Arguments */ + int retprobe; /* Return probe */ + /* Output */ int found; /* Number of found probe points */ char *probes[MAX_PROBES]; /* Output buffers (will be allocated)*/ -- cgit v0.10.2 From b0ef07324310d66f660a311d4a8d669eda74f801 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 27 Oct 2009 16:43:19 -0400 Subject: perf/probes: Support function entry relative line number Add function-entry relative line number specifying support to perf-probe. This allows users to define probes by line number from entry of the function. e.g. perf probe schedule:16 Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091027204319.30545.30678.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 92b4c49..a99a366 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -133,17 +133,16 @@ static void parse_probe_point(char *arg, struct probe_point *pp) } /* Exclusion check */ - if (pp->line && pp->function) - semantic_error("Function-relative line number is not" - " supported yet."); + if (pp->line && pp->offset) + semantic_error("Offset can't be used with line number."); if (!pp->line && pp->file && !pp->function) semantic_error("File always requires line number."); if (pp->offset && !pp->function) semantic_error("Offset requires an entry function."); if (pp->retprobe && !pp->function) semantic_error("Return probe requires an entry function."); - if (pp->offset && pp->retprobe) - semantic_error("Offset can't be used with return probe."); + if ((pp->offset || pp->line) && pp->retprobe) + semantic_error("Offset/Line can't be used with return probe."); pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n", pp->function, pp->file, pp->line, pp->offset, pp->retprobe); @@ -270,7 +269,7 @@ static const struct option options[] = { #ifdef NO_LIBDWARF "FUNC[+OFFS|%return] [ARG ...]", #else - "FUNC[+OFFS|%return][@SRC]|SRC:LINE [ARG ...]", + "FUNC[+OFFS|%return|:RLN][@SRC]|SRC:ALN [ARG ...]", #endif "probe point definition, where\n" "\t\tGRP:\tGroup name (optional)\n" @@ -282,7 +281,8 @@ static const struct option options[] = { "\t\tARG:\tProbe argument (only \n" #else "\t\tSRC:\tSource code path\n" - "\t\tLINE:\tLine number\n" + "\t\tRLN:\tRelative line number from function entry.\n" + "\t\tALN:\tAbsolute line number in file.\n" "\t\tARG:\tProbe argument (local variable name or\n" #endif "\t\t\tkprobe-tracer argument format is supported.)\n", diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 6d3bac9..db96186 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -114,7 +114,7 @@ static int strtailcmp(const char *s1, const char *s2) } /* Find the fileno of the target file. */ -static Dwarf_Unsigned die_get_fileno(Dwarf_Die cu_die, const char *fname) +static Dwarf_Unsigned cu_find_fileno(Dwarf_Die cu_die, const char *fname) { Dwarf_Signed cnt, i; Dwarf_Unsigned found = 0; @@ -335,6 +335,36 @@ static int attr_get_locdesc(Dwarf_Attribute attr, Dwarf_Locdesc *desc, return ret; } +/* Get decl_file attribute value (file number) */ +static Dwarf_Unsigned die_get_decl_file(Dwarf_Die sp_die) +{ + Dwarf_Attribute attr; + Dwarf_Unsigned fno; + int ret; + + ret = dwarf_attr(sp_die, DW_AT_decl_file, &attr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_formudata(attr, &fno, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return fno; +} + +/* Get decl_line attribute value (line number) */ +static Dwarf_Unsigned die_get_decl_line(Dwarf_Die sp_die) +{ + Dwarf_Attribute attr; + Dwarf_Unsigned lno; + int ret; + + ret = dwarf_attr(sp_die, DW_AT_decl_line, &attr, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_formudata(attr, &lno, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + dwarf_dealloc(__dw_debug, attr, DW_DLA_ATTR); + return lno; +} + /* * Probe finder related functions */ @@ -501,6 +531,7 @@ static void show_probepoint(Dwarf_Die sp_die, Dwarf_Signed offs, DIE_IF(ret < 0); DIE_IF(ret >= MAX_PROBE_BUFFER); len = ret; + pr_debug("Probe point found: %s\n", tmp); /* Find each argument */ get_current_frame_base(sp_die, pf); @@ -536,17 +567,16 @@ static int probeaddr_callback(struct die_link *dlink, void *data) } /* Find probe point from its line number */ -static void find_by_line(Dwarf_Die cu_die, struct probe_finder *pf) +static void find_by_line(struct probe_finder *pf) { - struct probe_point *pp = pf->pp; - Dwarf_Signed cnt, i; + Dwarf_Signed cnt, i, clm; Dwarf_Line *lines; Dwarf_Unsigned lineno = 0; Dwarf_Addr addr; Dwarf_Unsigned fno; int ret; - ret = dwarf_srclines(cu_die, &lines, &cnt, &__dw_error); + ret = dwarf_srclines(pf->cu_die, &lines, &cnt, &__dw_error); DIE_IF(ret != DW_DLV_OK); for (i = 0; i < cnt; i++) { @@ -557,15 +587,20 @@ static void find_by_line(Dwarf_Die cu_die, struct probe_finder *pf) ret = dwarf_lineno(lines[i], &lineno, &__dw_error); DIE_IF(ret != DW_DLV_OK); - if (lineno != (Dwarf_Unsigned)pp->line) + if (lineno != pf->lno) continue; + ret = dwarf_lineoff(lines[i], &clm, &__dw_error); + DIE_IF(ret != DW_DLV_OK); + ret = dwarf_lineaddr(lines[i], &addr, &__dw_error); DIE_IF(ret != DW_DLV_OK); - pr_debug("Probe point found: 0x%llx\n", addr); + pr_debug("Probe line found: line[%d]:%u,%d addr:0x%llx\n", + (int)i, (unsigned)lineno, (int)clm, addr); pf->addr = addr; /* Search a real subprogram including this line, */ - ret = search_die_from_children(cu_die, probeaddr_callback, pf); + ret = search_die_from_children(pf->cu_die, + probeaddr_callback, pf); if (ret == 0) die("Probe point is not found in subprograms.\n"); /* Continuing, because target line might be inlined. */ @@ -587,6 +622,13 @@ static int probefunc_callback(struct die_link *dlink, void *data) DIE_IF(ret == DW_DLV_ERROR); if (tag == DW_TAG_subprogram) { if (die_compare_name(dlink->die, pp->function) == 0) { + if (pp->line) { /* Function relative line */ + pf->fno = die_get_decl_file(dlink->die); + pf->lno = die_get_decl_line(dlink->die) + + pp->line; + find_by_line(pf); + return 1; + } if (die_inlined_subprogram(dlink->die)) { /* Inlined function, save it. */ ret = dwarf_die_CU_offset(dlink->die, @@ -631,9 +673,9 @@ found: return 0; } -static void find_by_func(Dwarf_Die cu_die, struct probe_finder *pf) +static void find_by_func(struct probe_finder *pf) { - search_die_from_children(cu_die, probefunc_callback, pf); + search_die_from_children(pf->cu_die, probefunc_callback, pf); } /* Find a probe point */ @@ -641,7 +683,6 @@ int find_probepoint(int fd, struct probe_point *pp) { Dwarf_Half addr_size = 0; Dwarf_Unsigned next_cuh = 0; - Dwarf_Die cu_die = 0; int cu_number = 0, ret; struct probe_finder pf = {.pp = pp}; @@ -659,25 +700,27 @@ int find_probepoint(int fd, struct probe_point *pp) break; /* Get the DIE(Debugging Information Entry) of this CU */ - ret = dwarf_siblingof(__dw_debug, 0, &cu_die, &__dw_error); + ret = dwarf_siblingof(__dw_debug, 0, &pf.cu_die, &__dw_error); DIE_IF(ret != DW_DLV_OK); /* Check if target file is included. */ if (pp->file) - pf.fno = die_get_fileno(cu_die, pp->file); + pf.fno = cu_find_fileno(pf.cu_die, pp->file); if (!pp->file || pf.fno) { /* Save CU base address (for frame_base) */ - ret = dwarf_lowpc(cu_die, &pf.cu_base, &__dw_error); + ret = dwarf_lowpc(pf.cu_die, &pf.cu_base, &__dw_error); DIE_IF(ret == DW_DLV_ERROR); if (ret == DW_DLV_NO_ENTRY) pf.cu_base = 0; - if (pp->line) - find_by_line(cu_die, &pf); if (pp->function) - find_by_func(cu_die, &pf); + find_by_func(&pf); + else { + pf.lno = pp->line; + find_by_line(&pf); + } } - dwarf_dealloc(__dw_debug, cu_die, DW_DLA_DIE); + dwarf_dealloc(__dw_debug, pf.cu_die, DW_DLA_DIE); } ret = dwarf_finish(__dw_debug, &__dw_error); DIE_IF(ret != DW_DLV_OK); diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 240d6cb..bdebca6 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -41,7 +41,9 @@ struct probe_finder { /* For function searching */ Dwarf_Addr addr; /* Address */ Dwarf_Unsigned fno; /* File number */ + Dwarf_Unsigned lno; /* Line number */ Dwarf_Off inl_offs; /* Inline offset */ + Dwarf_Die cu_die; /* Current CU */ /* For variable searching */ Dwarf_Addr cu_base; /* Current CU base address */ -- cgit v0.10.2 From 3ed67776fc23061180896086a206a02be649dd26 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 28 Oct 2009 17:37:01 +0800 Subject: tracing/filters: Fix to make system filter work commit fce29d15b59245597f7f320db4a9f2be0f5fb512 ("tracing/filters: Refactor subsystem filter code") broke system filter accidentally. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Tom Zanussi LKML-Reference: <4AE810BD.3070009@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 21d3475..50504cb 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1230,12 +1230,12 @@ static int replace_system_preds(struct event_subsystem *system, struct filter_parse_state *ps, char *filter_string) { - struct event_filter *filter = system->filter; struct ftrace_event_call *call; bool fail = true; int err; list_for_each_entry(call, &ftrace_events, list) { + struct event_filter *filter = call->filter; if (!call->define_fields) continue; -- cgit v0.10.2 From 4f65ae36f0291ef97b7d4de2f59b2e68f3c8420b Mon Sep 17 00:00:00 2001 From: Pavel Vasilyev Date: Thu, 29 Oct 2009 17:16:00 +0100 Subject: agp/amd64: Remove GART dependency on AGP_AMD64 The GART IOMMU code has no strong dependency to the AMD64 AGP code. So the automatic selection of AGP_AMD64 for GART can be removed. Cc: Dave Jones Signed-off-by: Pavel Vasilyev Signed-off-by: Joerg Roedel diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig index ccb1fa8..2fb3a48 100644 --- a/drivers/char/agp/Kconfig +++ b/drivers/char/agp/Kconfig @@ -56,9 +56,8 @@ config AGP_AMD X on AMD Irongate, 761, and 762 chipsets. config AGP_AMD64 - tristate "AMD Opteron/Athlon64 on-CPU GART support" if !GART_IOMMU + tristate "AMD Opteron/Athlon64 on-CPU GART support" depends on AGP && X86 - default y if GART_IOMMU help This option gives you AGP support for the GLX component of X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs. -- cgit v0.10.2 From d49f6aa76d24c60a52530474cb662e8ad9f09471 Mon Sep 17 00:00:00 2001 From: Li Hong Date: Wed, 28 Oct 2009 13:01:38 +0800 Subject: tracing: Amend documentation in recordmcount.pl to reflect implementation The documentation currently says we will use the first function in a section as a reference. The actual algorithm is: choose the first global function we meet as a reference. If there is none, choose the first local one. Change the documentation to be consistent with the code. Also add several other clarifications. Signed-off-by: Li Hong LKML-Reference: <20091028050138.GA30758@uhli> Signed-off-by: Steven Rostedt diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index bfb8b2c..a569be7 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -6,73 +6,89 @@ # all the offsets to the calls to mcount. # # -# What we want to end up with is a section in vmlinux called -# __mcount_loc that contains a list of pointers to all the -# call sites in the kernel that call mcount. Later on boot up, the kernel -# will read this list, save the locations and turn them into nops. -# When tracing or profiling is later enabled, these locations will then -# be converted back to pointers to some function. +# What we want to end up with this is that each object file will have a +# section called __mcount_loc that will hold the list of pointers to mcount +# callers. After final linking, the vmlinux will have within .init.data the +# list of all callers to mcount between __start_mcount_loc and __stop_mcount_loc. +# Later on boot up, the kernel will read this list, save the locations and turn +# them into nops. When tracing or profiling is later enabled, these locations +# will then be converted back to pointers to some function. # # This is no easy feat. This script is called just after the original # object is compiled and before it is linked. # -# The references to the call sites are offsets from the section of text -# that the call site is in. Hence, all functions in a section that -# has a call site to mcount, will have the offset from the beginning of -# the section and not the beginning of the function. +# When parse this object file using 'objdump', the references to the call +# sites are offsets from the section that the call site is in. Hence, all +# functions in a section that has a call site to mcount, will have the +# offset from the beginning of the section and not the beginning of the +# function. +# +# But where this section will reside finally in vmlinx is undetermined at +# this point. So we can't use this kind of offsets to record the final +# address of this call site. +# +# The trick is to change the call offset referring the start of a section to +# referring a function symbol in this section. During the link step, 'ld' will +# compute the final address according to the information we record. # -# The trick is to find a way to record the beginning of the section. -# The way we do this is to look at the first function in the section -# which will also be the location of that section after final link. # e.g. # # .section ".sched.text", "ax" -# .globl my_func -# my_func: # [...] -# call mcount (offset: 0x5) +# func1: +# [...] +# call mcount (offset: 0x10) +# [...] +# ret +# .globl fun2 +# func2: (offset: 0x20) +# [...] # [...] # ret -# other_func: +# func3: # [...] -# call mcount (offset: 0x1b) +# call mcount (offset: 0x30) # [...] # # Both relocation offsets for the mcounts in the above example will be -# offset from .sched.text. If we make another file called tmp.s with: +# offset from .sched.text. If we choose global symbol func2 as a reference and +# make another file called tmp.s with the new offsets: # # .section __mcount_loc -# .quad my_func + 0x5 -# .quad my_func + 0x1b +# .quad func2 - 0x10 +# .quad func2 + 0x10 # -# We can then compile this tmp.s into tmp.o, and link it to the original +# We can then compile this tmp.s into tmp.o, and link it back to the original # object. # -# But this gets hard if my_func is not globl (a static function). -# In such a case we have: +# In our algorithm, we will choose the first global function we meet in this +# section as the reference. But this gets hard if there is no global functions +# in this section. In such a case we have to select a local one. E.g. func1: # # .section ".sched.text", "ax" -# my_func: +# func1: # [...] -# call mcount (offset: 0x5) +# call mcount (offset: 0x10) # [...] # ret -# other_func: +# func2: # [...] -# call mcount (offset: 0x1b) +# call mcount (offset: 0x20) # [...] +# .section "other.section" # # If we make the tmp.s the same as above, when we link together with -# the original object, we will end up with two symbols for my_func: +# the original object, we will end up with two symbols for func1: # one local, one global. After final compile, we will end up with -# an undefined reference to my_func. +# an undefined reference to func1 or a wrong reference to another global +# func1 in other files. # # Since local objects can reference local variables, we need to find # a way to make tmp.o reference the local objects of the original object -# file after it is linked together. To do this, we convert the my_func +# file after it is linked together. To do this, we convert func1 # into a global symbol before linking tmp.o. Then after we link tmp.o -# we will only have a single symbol for my_func that is global. -# We can convert my_func back into a local symbol and we are done. +# we will only have a single symbol for func1 that is global. +# We can convert func1 back into a local symbol and we are done. # # Here are the steps we take: # @@ -86,10 +102,8 @@ # 6) Link together this new object with the list object. # 7) Convert the local functions back to local symbols and rename # the result as the original object. -# End. # 8) Link the object with the list object. # 9) Move the result back to the original object. -# End. # use strict; -- cgit v0.10.2 From e2d753fac5b3954a3b6001f98479f0435fe7c868 Mon Sep 17 00:00:00 2001 From: Li Hong Date: Tue, 27 Oct 2009 14:57:33 +0800 Subject: tracing: Correct the check for number of arguments in recordmcount.pl The number of arguments passed into recordmcount.pl is 10, but the code checks if only 7 are passed in. Signed-off-by: Li Hong LKML-Reference: <20091027065733.GB22032@uhli> Signed-off-by: Steven Rostedt diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index a569be7..a512af1 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -113,7 +113,7 @@ $P =~ s@.*/@@g; my $V = '0.1'; -if ($#ARGV < 7) { +if ($#ARGV != 10) { print "usage: $P arch bits objdump objcopy cc ld nm rm mv is_module inputfile\n"; print "version: $V\n"; exit(1); -- cgit v0.10.2 From bdd3b052c63b2c19a0118937f500985c01a19956 Mon Sep 17 00:00:00 2001 From: Li Hong Date: Wed, 28 Oct 2009 13:03:32 +0800 Subject: tracing: Check absolute path of input file in recordmcount.pl The ftrace.c file may reference the mcount function and this may interfere with the recordmcount.pl processing. To avoid this, the code does not process the kernel/trace/ftrace.o. But currently the check is against a relative path. This patch modifies the check to succeed if the path is an absolute path. Signed-off-by: Li Hong LKML-Reference: <20091028050332.GC30758@uhli> Signed-off-by: Steven Rostedt diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index a512af1..b80e5d0 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -123,7 +123,7 @@ my ($arch, $bits, $objdump, $objcopy, $cc, $ld, $nm, $rm, $mv, $is_module, $inputfile) = @ARGV; # This file refers to mcount and shouldn't be ftraced, so lets' ignore it -if ($inputfile eq "kernel/trace/ftrace.o") { +if ($inputfile =~ m,kernel/trace/ftrace\.o$,) { exit(0); } -- cgit v0.10.2 From 7b7edc27683e20624f4daf17c76041719184201c Mon Sep 17 00:00:00 2001 From: Li Hong Date: Wed, 28 Oct 2009 13:04:21 +0800 Subject: tracing: Fix objcopy revision check in recordmcount.pl The current logic to check objcopy's version is incorrect. This patch fixes the algorithm and disables the use of local functions as a reference if the objcopy version does not support static to global conversions. Also remove some usused variables. Signed-off-by: Li Hong LKML-Reference: <20091028050421.GD30758@uhli> Signed-off-by: Steven Rostedt diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index b80e5d0..d6199fc 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -159,6 +159,31 @@ my $function_regex; # Find the name of a function my $mcount_regex; # Find the call site to mcount (return offset) my $alignment; # The .align value to use for $mcount_section my $section_type; # Section header plus possible alignment command +my $can_use_local = 0; # If we can use local function references + +## +# check_objcopy - whether objcopy supports --globalize-symbols +# +# --globalize-symbols came out in 2.17, we must test the version +# of objcopy, and if it is less than 2.17, then we can not +# record local functions. +sub check_objcopy +{ + open (IN, "$objcopy --version |") or die "error running $objcopy"; + while () { + if (/objcopy.*\s(\d+)\.(\d+)/) { + $can_use_local = 1 if ($1 > 2 || ($1 == 2 && $2 >= 17)); + last; + } + } + close (IN); + + if (!$can_use_local) { + print STDERR "WARNING: could not find objcopy version or version " . + "is less than 2.17.\n" . + "\tLocal function references is disabled.\n"; + } +} if ($arch eq "x86") { if ($bits == 64) { @@ -293,34 +318,7 @@ if ($filename =~ m,^(.*)(\.\S),) { my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s"; my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o"; -# -# --globalize-symbols came out in 2.17, we must test the version -# of objcopy, and if it is less than 2.17, then we can not -# record local functions. -my $use_locals = 01; -my $local_warn_once = 0; -my $found_version = 0; - -open (IN, "$objcopy --version |") || die "error running $objcopy"; -while () { - if (/objcopy.*\s(\d+)\.(\d+)/) { - my $major = $1; - my $minor = $2; - - $found_version = 1; - if ($major < 2 || - ($major == 2 && $minor < 17)) { - $use_locals = 0; - } - last; - } -} -close (IN); - -if (!$found_version) { - print STDERR "WARNING: could not find objcopy version.\n" . - "\tDisabling local function references.\n"; -} +check_objcopy(); # # Step 1: find all the local (static functions) and weak symbols. @@ -367,7 +365,7 @@ sub update_funcs if (defined $locals{$ref_func}) { # only use locals if objcopy supports globalize-symbols - if (!$use_locals) { + if (!$can_use_local) { return; } $convert{$ref_func} = 1; -- cgit v0.10.2 From db24c7dcf42f78629d89b34e5d5a98ed56ea2ff5 Mon Sep 17 00:00:00 2001 From: Li Hong Date: Wed, 28 Oct 2009 13:05:23 +0800 Subject: tracing: Move mcount section search to front of loop in recordmcount.pl Move the mcount section check to the beginning of the objdump read loop. This makes the code easier to follow since the search for the mcount section is performed first before the mcount callers are processed. Signed-off-by: Li Hong LKML-Reference: <20091028050523.GE30758@uhli> Signed-off-by: Steven Rostedt diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index d6199fc..02c8055 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -391,9 +391,27 @@ open(IN, "$objdump -hdr $inputfile|") || die "error running $objdump"; my $text; + +# read headers first my $read_headers = 1; while () { + + if ($read_headers && /$mcount_section/) { + # + # Somehow the make process can execute this script on an + # object twice. If it does, we would duplicate the mcount + # section and it will cause the function tracer self test + # to fail. Check if the mcount section exists, and if it does, + # warn and exit. + # + print STDERR "ERROR: $mcount_section already in $inputfile\n" . + "\tThis may be an indication that your build is corrupted.\n" . + "\tDelete $inputfile and try again. If the same object file\n" . + "\tstill causes an issue, then disable CONFIG_DYNAMIC_FTRACE.\n"; + exit(-1); + } + # is it a section? if (/$section_regex/) { $read_headers = 0; @@ -434,21 +452,7 @@ while () { $offset = hex $1; } } - } elsif ($read_headers && /$mcount_section/) { - # - # Somehow the make process can execute this script on an - # object twice. If it does, we would duplicate the mcount - # section and it will cause the function tracer self test - # to fail. Check if the mcount section exists, and if it does, - # warn and exit. - # - print STDERR "ERROR: $mcount_section already in $inputfile\n" . - "\tThis may be an indication that your build is corrupted.\n" . - "\tDelete $inputfile and try again. If the same object file\n" . - "\tstill causes an issue, then disable CONFIG_DYNAMIC_FTRACE.\n"; - exit(-1); } - # is this a call site to mcount? If so, record it to print later if ($text_found && /$mcount_regex/) { $offsets[$#offsets + 1] = hex $1; -- cgit v0.10.2 From 306dcf47d28aaf9aedfafb17a602768584cfc0f2 Mon Sep 17 00:00:00 2001 From: Li Hong Date: Wed, 28 Oct 2009 13:06:19 +0800 Subject: tracing: Add regex for weak functions in recordmcount.pl Add a variable to contain the regex needed to find weak functions in the 'nm' output. This will allow other archs to easily override it. Also rename the regex variable $nm_regex to $local_regex to be more descriptive. Signed-off-by: Li Hong LKML-Reference: <20091028050619.GF30758@uhli> Signed-off-by: Steven Rostedt diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 02c8055..7265a7d 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -92,7 +92,7 @@ # # Here are the steps we take: # -# 1) Record all the local symbols by using 'nm' +# 1) Record all the local and weak symbols by using 'nm' # 2) Use objdump to find all the call site offsets and sections for # mcount. # 3) Compile the list into its own object. @@ -152,7 +152,8 @@ my %weak; # List of weak functions my %convert; # List of local functions used that needs conversion my $type; -my $nm_regex; # Find the local functions (return function) +my $local_regex; # Match a local function (return function) +my $weak_regex; # Match a weak function (return function) my $section_regex; # Find the start of a section my $function_regex; # Find the name of a function # (return offset and func name) @@ -197,7 +198,8 @@ if ($arch eq "x86") { # We base the defaults off of i386, the other archs may # feel free to change them in the below if statements. # -$nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\S+)"; +$local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\S+)"; +$weak_regex = "^[0-9a-fA-F]+\\s+([wW])\\s+(\\S+)"; $section_regex = "Disassembly of section\\s+(\\S+):"; $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$"; @@ -246,7 +248,7 @@ if ($arch eq "x86_64") { $cc .= " -m32"; } elsif ($arch eq "powerpc") { - $nm_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)"; + $local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\.?\\S+)"; $function_regex = "^([0-9a-fA-F]+)\\s+<(\\.?.*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s\\.?_mcount\$"; @@ -322,13 +324,13 @@ check_objcopy(); # # Step 1: find all the local (static functions) and weak symbols. -# 't' is local, 'w/W' is weak (we never use a weak function) +# 't' is local, 'w/W' is weak # open (IN, "$nm $inputfile|") || die "error running $nm"; while () { - if (/$nm_regex/) { + if (/$local_regex/) { $locals{$1} = 1; - } elsif (/^[0-9a-fA-F]+\s+([wW])\s+(\S+)/) { + } elsif (/$weak_regex/) { $weak{$2} = $1; } } -- cgit v0.10.2 From 6092858c60f168c1950f8ad73880d54271696ec5 Mon Sep 17 00:00:00 2001 From: Li Hong Date: Wed, 28 Oct 2009 13:07:03 +0800 Subject: tracing: Move conditional into update_funcs() in recordmcount.pl Move all the condition validations into the function update_funcs(). Also update_funcs should not die if $ref_func is undefined for there may be more than one valid section in an object file. Signed-off-by: Li Hong LKML-Reference: <20091028050703.GG30758@uhli> Signed-off-by: Steven Rostedt diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 7265a7d..884776a 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -348,9 +348,7 @@ my $offset = 0; # offset of ref_func to section beginning # sub update_funcs { - return if ($#offsets < 0); - - defined($ref_func) || die "No function to reference"; + return unless ($ref_func and @offsets); # A section only had a weak function, to represent it. # Unfortunately, a weak function may be overwritten by another @@ -425,7 +423,7 @@ while () { $read_function = 0; } # print out any recorded offsets - update_funcs() if (defined($ref_func)); + update_funcs(); # reset all markers and arrays $text_found = 0; @@ -462,7 +460,7 @@ while () { } # dump out anymore offsets that may have been found -update_funcs() if (defined($ref_func)); +update_funcs(); # If we did not find any mcount callers, we are done (do nothing). if (!$opened) { -- cgit v0.10.2 From c4b8ac2c1aee1398b9378b8730bac56294b3410b Mon Sep 17 00:00:00 2001 From: Li Hong Date: Wed, 28 Oct 2009 13:07:43 +0800 Subject: tracing: Exit with error if a weak function is used in recordmcount.pl If a weak function is used as a relocation reference for mcount callers and that function is overridden, it will cause ftrace to fail at run time. The current code should prevent a weak function from being used, but if one is, the code should exit with an error to fail at compile time. Signed-off-by: Li Hong LKML-Reference: <20091028050743.GH30758@uhli> Signed-off-by: Steven Rostedt diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 884776a..a4e2435 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -350,15 +350,11 @@ sub update_funcs { return unless ($ref_func and @offsets); - # A section only had a weak function, to represent it. - # Unfortunately, a weak function may be overwritten by another - # function of the same name, making all these offsets incorrect. - # To be safe, we simply print a warning and bail. + # Sanity check on weak function. A weak function may be overwritten by + # another function of the same name, making all these offsets incorrect. if (defined $weak{$ref_func}) { - print STDERR - "$inputfile: WARNING: referencing weak function" . + die "$inputfile: ERROR: referencing weak function" . " $ref_func for mcount\n"; - return; } # is this function static? If so, note this fact. -- cgit v0.10.2 From 5975c725dfd6f7d36f493ab1453fbdbd35c1f0e3 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Thu, 29 Oct 2009 11:40:17 -0500 Subject: define convenient securebits masks for prctl users (v2) Hi James, would you mind taking the following into security-testing? The securebits are used by passing them to prctl with the PR_{S,G}ET_SECUREBITS commands. But the defines must be shifted to be used in prctl, which begs to be confused and misused by userspace. So define some more convenient values for userspace to specify. This way userspace does prctl(PR_SET_SECUREBITS, SECBIT_NOROOT); instead of prctl(PR_SET_SECUREBITS, 1 << SECURE_NOROOT); (Thanks to Michael for the idea) This patch also adds include/linux/securebits to the installed headers. Then perhaps it can be included by glibc's sys/prctl.h. Changelog: Oct 29: Stephen Rothwell points out that issecure can be under __KERNEL__. Oct 14: (Suggestions by Michael Kerrisk): 1. spell out SETUID in SECBIT_NO_SETUID* 2. SECBIT_X_LOCKED does not imply SECBIT_X 3. add definitions for keepcaps Oct 14: As suggested by Michael Kerrisk, don't use SB_* as that convention is already in use. Use SECBIT_ prefix instead. Signed-off-by: Serge E. Hallyn Acked-by: Andrew G. Morgan Acked-by: Michael Kerrisk Cc: Ulrich Drepper Cc: James Morris Signed-off-by: James Morris diff --git a/include/linux/Kbuild b/include/linux/Kbuild index cff4a10..ffcdb9b 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -329,6 +329,7 @@ unifdef-y += scc.h unifdef-y += sched.h unifdef-y += screen_info.h unifdef-y += sdla.h +unifdef-y += securebits.h unifdef-y += selinux_netlink.h unifdef-y += sem.h unifdef-y += serial_core.h diff --git a/include/linux/securebits.h b/include/linux/securebits.h index d2c5ed8..3340617 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -1,6 +1,15 @@ #ifndef _LINUX_SECUREBITS_H #define _LINUX_SECUREBITS_H 1 +/* Each securesetting is implemented using two bits. One bit specifies + whether the setting is on or off. The other bit specify whether the + setting is locked or not. A setting which is locked cannot be + changed from user-level. */ +#define issecure_mask(X) (1 << (X)) +#ifdef __KERNEL__ +#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) +#endif + #define SECUREBITS_DEFAULT 0x00000000 /* When set UID 0 has no special privileges. When unset, we support @@ -12,6 +21,9 @@ #define SECURE_NOROOT 0 #define SECURE_NOROOT_LOCKED 1 /* make bit-0 immutable */ +#define SECBIT_NOROOT (issecure_mask(SECURE_NOROOT)) +#define SECBIT_NOROOT_LOCKED (issecure_mask(SECURE_NOROOT_LOCKED)) + /* When set, setuid to/from uid 0 does not trigger capability-"fixup". When unset, to provide compatiblility with old programs relying on set*uid to gain/lose privilege, transitions to/from uid 0 cause @@ -19,6 +31,10 @@ #define SECURE_NO_SETUID_FIXUP 2 #define SECURE_NO_SETUID_FIXUP_LOCKED 3 /* make bit-2 immutable */ +#define SECBIT_NO_SETUID_FIXUP (issecure_mask(SECURE_NO_SETUID_FIXUP)) +#define SECBIT_NO_SETUID_FIXUP_LOCKED \ + (issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)) + /* When set, a process can retain its capabilities even after transitioning to a non-root user (the set-uid fixup suppressed by bit 2). Bit-4 is cleared when a process calls exec(); setting both @@ -27,12 +43,8 @@ #define SECURE_KEEP_CAPS 4 #define SECURE_KEEP_CAPS_LOCKED 5 /* make bit-4 immutable */ -/* Each securesetting is implemented using two bits. One bit specifies - whether the setting is on or off. The other bit specify whether the - setting is locked or not. A setting which is locked cannot be - changed from user-level. */ -#define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) +#define SECBIT_KEEP_CAPS (issecure_mask(SECURE_KEEP_CAPS)) +#define SECBIT_KEEP_CAPS_LOCKED (issecure_mask(SECURE_KEEP_CAPS_LOCKED)) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ -- cgit v0.10.2 From 3507d612366a4e81226295f646410130a1f62a5c Mon Sep 17 00:00:00 2001 From: Rajiv Andrade Date: Thu, 10 Sep 2009 17:09:35 -0300 Subject: tpm_tis: TPM_STS_DATA_EXPECT workaround Some newer Lenovo models are shipped with a TPM that doesn't seem to set the TPM_STS_DATA_EXPECT status bit when sending it a burst of data, so the code understands it as a failure and doesn't proceed sending the chip the intended data. In this patch we bypass this bit check in case the itpm module parameter was set. This patch is based on Andy Isaacson's one: http://marc.info/?l=linux-kernel&m=124650185023495&w=2 It was heavily discussed how should we deal with identifying the chip in kernel space, but the required patch to do so was NACK'd: http://marc.info/?l=linux-kernel&m=124650186423711&w=2 This way we let the user choose using this workaround or not based on his observations on this code behavior when trying to use the TPM. Fixed a checkpatch issue present on the previous patch, thanks to Daniel Walker. Signed-off-by: Rajiv Andrade Acked-by: Eric Paris Tested-by: Seiji Munetoh Signed-off-by: James Morris diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 0b73e4e..27e8de4 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -257,6 +257,10 @@ out: return size; } +static int itpm; +module_param(itpm, bool, 0444); +MODULE_PARM_DESC(itpm, "Force iTPM workarounds (found on some Lenovo laptops)"); + /* * If interrupts are used (signaled by an irq set in the vendor structure) * tpm.c can skip polling for the data to be available as the interrupt is @@ -293,7 +297,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, &chip->vendor.int_queue); status = tpm_tis_status(chip); - if ((status & TPM_STS_DATA_EXPECT) == 0) { + if (!itpm && (status & TPM_STS_DATA_EXPECT) == 0) { rc = -EIO; goto out_err; } @@ -467,6 +471,10 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, "1.2 TPM (device-id 0x%X, rev-id %d)\n", vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0))); + if (itpm) + dev_info(dev, "Intel iTPM workaround enabled\n"); + + /* Figure out the capabilities */ intfcaps = ioread32(chip->vendor.iobase + -- cgit v0.10.2 From 7b2a35132ad0a70902dcd2844c27ed64cda0ce9b Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 2 Nov 2009 08:50:52 +0800 Subject: compiler: Introduce __always_unused I wrote some code which is used as compile-time checker, and the code should be elided after compile. So I need to annotate the code as "always unused", compared to "maybe unused". Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Linus Torvalds LKML-Reference: <4AEE2CEC.8040206@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index a3ed7cb..73dcf80 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -79,6 +79,7 @@ #define noinline __attribute__((noinline)) #define __attribute_const__ __attribute__((__const__)) #define __maybe_unused __attribute__((unused)) +#define __always_unused __attribute__((unused)) #define __gcc_header(x) #x #define _gcc_header(x) __gcc_header(linux/compiler-gcc##x.h) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb513..7947f4f 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -213,6 +213,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __maybe_unused /* unimplemented */ #endif +#ifndef __always_unused +# define __always_unused /* unimplemented */ +#endif + #ifndef noinline #define noinline #endif -- cgit v0.10.2 From 5e9b397292ca0b9409dced33e3a22ec993377064 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 2 Nov 2009 08:51:13 +0800 Subject: tracing: Fix to use __always_unused attribute ____ftrace_check_##name() is used for compile-time check on F_printk() only, so it should be marked as __unused instead of __used. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Linus Torvalds LKML-Reference: <4AEE2D01.4010305@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 9753fcc..c74848d 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -48,11 +48,11 @@ struct ____ftrace_##name { \ tstruct \ }; \ -static void __used ____ftrace_check_##name(void) \ +static void __always_unused ____ftrace_check_##name(void) \ { \ struct ____ftrace_##name *__entry = NULL; \ \ - /* force cmpile-time check on F_printk() */ \ + /* force compile-time check on F_printk() */ \ printk(print); \ } -- cgit v0.10.2 From 7a7732bc0f7c46f217dbec723f25366b6285cc42 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:31 -0800 Subject: x86: Unify fixup_irqs() for 32-bit and 64-bit kernels There is no reason to have different fixup_irqs() for 32-bit and 64-bit kernels. Unify by using the superior 64-bit version for both the kernels. Signed-off-by: Suresh Siddha Signed-off-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230001.562512739@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3912061..3ea6655 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -276,3 +276,62 @@ void smp_generic_interrupt(struct pt_regs *regs) } EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); + +#ifdef CONFIG_HOTPLUG_CPU +/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ +void fixup_irqs(void) +{ + unsigned int irq; + static int warned; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { + int break_affinity = 0; + int set_affinity = 1; + const struct cpumask *affinity; + + if (!desc) + continue; + if (irq == 2) + continue; + + /* interrupt's are disabled at this point */ + spin_lock(&desc->lock); + + affinity = desc->affinity; + if (!irq_has_action(irq) || + cpumask_equal(affinity, cpu_online_mask)) { + spin_unlock(&desc->lock); + continue; + } + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + break_affinity = 1; + affinity = cpu_all_mask; + } + + if (desc->chip->mask) + desc->chip->mask(irq); + + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, affinity); + else if (!(warned++)) + set_affinity = 0; + + if (desc->chip->unmask) + desc->chip->unmask(irq); + + spin_unlock(&desc->lock); + + if (break_affinity && set_affinity) + printk("Broke affinity for irq %i\n", irq); + else if (!set_affinity) + printk("Cannot set affinity for irq %i\n", irq); + } + + /* That doesn't seem sufficient. Give it 1ms. */ + local_irq_enable(); + mdelay(1); + local_irq_disable(); +} +#endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 7d35d0f..10709f2 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } - -#ifdef CONFIG_HOTPLUG_CPU - -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) -{ - unsigned int irq; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - affinity = desc->affinity; - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - printk("Breaking affinity for irq %i\n", irq); - affinity = cpu_all_mask; - } - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); - else if (desc->action) - printk_once("Cannot set affinity for irq %i\n", irq); - } - -#if 0 - barrier(); - /* Ingo Molnar says: "after the IO-APIC masks have been redirected - [note the nop - the interrupt-enable boundary on x86 is two - instructions from sti] - to flush out pending hardirqs and - IPIs. After this point nothing is supposed to reach this CPU." */ - __asm__ __volatile__("sti; nop; cli"); - barrier(); -#else - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -#endif -} -#endif - diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 977d8b4..acf8fbf 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } -#ifdef CONFIG_HOTPLUG_CPU -/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ -void fixup_irqs(void) -{ - unsigned int irq; - static int warned; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - int break_affinity = 0; - int set_affinity = 1; - const struct cpumask *affinity; - - if (!desc) - continue; - if (irq == 2) - continue; - - /* interrupt's are disabled at this point */ - spin_lock(&desc->lock); - - affinity = desc->affinity; - if (!irq_has_action(irq) || - cpumask_equal(affinity, cpu_online_mask)) { - spin_unlock(&desc->lock); - continue; - } - - if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { - break_affinity = 1; - affinity = cpu_all_mask; - } - - if (desc->chip->mask) - desc->chip->mask(irq); - - if (desc->chip->set_affinity) - desc->chip->set_affinity(irq, affinity); - else if (!(warned++)) - set_affinity = 0; - - if (desc->chip->unmask) - desc->chip->unmask(irq); - - spin_unlock(&desc->lock); - - if (break_affinity && set_affinity) - printk("Broke affinity for irq %i\n", irq); - else if (!set_affinity) - printk("Cannot set affinity for irq %i\n", irq); - } - - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); -} -#endif extern void call_softirq(void); -- cgit v0.10.2 From 84e21493a3b28c9fefe99fe827fc0c0c101a813d Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:32 -0800 Subject: x86, intr-remap: Avoid irq_chip mask/unmask in fixup_irqs() for intr-remapping In the presence of interrupt-remapping, irqs will be migrated in the process context and we don't do (and there is no need to) irq_chip mask/unmask while migrating the interrupt. Similarly fix the fixup_irqs() that get called during cpu offline and avoid calling irq_chip mask/unmask for irqs that are ok to be migrated in the process context. While we didn't observe any race condition with the existing code, this change takes complete advantage of interrupt-remapping in the newer generation platforms and avoids any potential HW lockup's (that often worry Eric :) Signed-off-by: Suresh Siddha Acked-by: Eric W. Biederman Cc: garyhade@us.ibm.com LKML-Reference: <20091026230001.661423939@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3ea6655..342bcbc 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -310,7 +310,7 @@ void fixup_irqs(void) affinity = cpu_all_mask; } - if (desc->chip->mask) + if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) desc->chip->mask(irq); if (desc->chip->set_affinity) @@ -318,7 +318,7 @@ void fixup_irqs(void) else if (!(warned++)) set_affinity = 0; - if (desc->chip->unmask) + if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) desc->chip->unmask(irq); spin_unlock(&desc->lock); -- cgit v0.10.2 From 23359a88e7eca3c4f402562b102f23014db3c2aa Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:33 -0800 Subject: x86: Remove move_cleanup_count from irq_cfg move_cleanup_count for each irq in irq_cfg is keeping track of the total number of cpus that need to free the corresponding vectors associated with the irq which has now been migrated to new destination. As long as this move_cleanup_count is non-zero (i.e., as long as we have n't freed the vector allocations on the old destinations) we were preventing the irq's further migration. This cleanup count is unnecessary and it is enough to not allow the irq migration till we send the cleanup vector to the previous irq destination, for which we already have irq_cfg's move_in_progress. All we need to make sure is that we free the vector at the old desintation but we don't need to wait till that gets freed. Signed-off-by: Suresh Siddha Acked-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230001.752968906@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 1984ce9..6e12426 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -94,7 +94,6 @@ struct irq_cfg { struct irq_pin_list *irq_2_pin; cpumask_var_t domain; cpumask_var_t old_domain; - unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; }; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ce16b65..e9e5b02 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1161,7 +1161,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int cpu, err; cpumask_var_t tmp_mask; - if ((cfg->move_in_progress) || cfg->move_cleanup_count) + if (cfg->move_in_progress) return -EBUSY; if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) @@ -2234,14 +2234,10 @@ void send_cleanup_vector(struct irq_cfg *cfg) if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } @@ -2430,8 +2426,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) cfg = irq_cfg(irq); spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; @@ -2449,7 +2443,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) goto unlock; } __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; unlock: spin_unlock(&desc->lock); } -- cgit v0.10.2 From a5e74b841930bec78a4684ab9f208b2ddfe7c736 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:34 -0800 Subject: x86: Force irq complete move during cpu offline When a cpu goes offline, fixup_irqs() try to move irq's currently destined to the offline cpu to a new cpu. But this attempt will fail if the irq is recently moved to this cpu and the irq still hasn't arrived at this cpu (for non intr-remapping platforms this is when we free the vector allocation at the previous destination) that is about to go offline. This will endup with the interrupt subsystem still pointing the irq to the offline cpu, causing that irq to not work any more. Fix this by forcing the irq to complete its move (its been a long time we moved the irq to this cpu which we are offlining now) and then move this irq to a new cpu before this cpu goes offline. Signed-off-by: Suresh Siddha Acked-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230001.848830905@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index ddda6cb..ffd700f 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -34,6 +34,7 @@ static inline int irq_canonicalize(int irq) #ifdef CONFIG_HOTPLUG_CPU #include extern void fixup_irqs(void); +extern void irq_force_complete_move(int); #endif extern void (*generic_interrupt_extension)(void); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e9e5b02..4e886ef 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2450,21 +2450,33 @@ unlock: irq_exit(); } -static void irq_complete_move(struct irq_desc **descp) +static void __irq_complete_move(struct irq_desc **descp, unsigned vector) { struct irq_desc *desc = *descp; struct irq_cfg *cfg = desc->chip_data; - unsigned vector, me; + unsigned me; if (likely(!cfg->move_in_progress)) return; - vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); } + +static void irq_complete_move(struct irq_desc **descp) +{ + __irq_complete_move(descp, ~get_irq_regs()->orig_ax); +} + +void irq_force_complete_move(int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + + __irq_complete_move(&desc, cfg->vector); +} #else static inline void irq_complete_move(struct irq_desc **descp) {} #endif diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 342bcbc..b10a5e1 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -305,6 +305,13 @@ void fixup_irqs(void) continue; } + /* + * Complete the irq move. This cpu is going down and for + * non intr-remapping case, we can't wait till this interrupt + * arrives at this cpu before completing the irq move. + */ + irq_force_complete_move(irq); + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; affinity = cpu_all_mask; -- cgit v0.10.2 From b3ec0a37a7907813bb4fb85a2d94102c152470b7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:35 -0800 Subject: x86: Use EOI register in io-apic on intel platforms IO-APIC's in intel chipsets support EOI register starting from IO-APIC version 2. Use that when ever we need to clear the IO-APIC RTE's RemoteIRR bit explicitly. Signed-off-by: Suresh Siddha Acked-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230001.947855317@sbs-t61.sc.intel.com> [ Marked use_eio_reg as __read_mostly, fixed small details ] Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4e886ef..31e9db3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2492,6 +2492,51 @@ static void ack_apic_edge(unsigned int irq) atomic_t irq_mis_count; +static int use_eoi_reg __read_mostly; + +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry; + + for_each_irq_pin(entry, cfg->irq_2_pin) { + if (irq_remapped(irq)) + io_apic_eoi(entry->apic, entry->pin); + else + io_apic_eoi(entry->apic, cfg->vector); + } +} + +static void eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static int ioapic_supports_eoi(void) +{ + struct pci_dev *root; + + root = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); + if (root && root->vendor == PCI_VENDOR_ID_INTEL && + mp_ioapics[0].apicver >= 0x2) { + use_eoi_reg = 1; + printk(KERN_INFO "IO-APIC supports EOI register\n"); + } else + printk(KERN_INFO "IO-APIC doesn't support EOI\n"); + + return 0; +} + +fs_initcall(ioapic_supports_eoi); + static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2575,37 +2620,19 @@ static void ack_apic_level(unsigned int irq) /* Tail end of version 0x11 I/O APIC bug workaround */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); + + if (use_eoi_reg) + eoi_ioapic_irq(desc); + else { + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(cfg); + __unmask_and_level_IO_APIC_irq(cfg); + spin_unlock(&ioapic_lock); + } } } #ifdef CONFIG_INTR_REMAP -static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - struct irq_pin_list *entry; - - for_each_irq_pin(entry, cfg->irq_2_pin) - io_apic_eoi(entry->apic, entry->pin); -} - -static void -eoi_ioapic_irq(struct irq_desc *desc) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - static void ir_ack_apic_edge(unsigned int irq) { ack_APIC_irq(); -- cgit v0.10.2 From 5231a68614b94f60e8f6c56bc6e3d75955b9e75e Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:36 -0800 Subject: x86: Remove local_irq_enable()/local_irq_disable() in fixup_irqs() To ensure that we handle all the pending interrupts (destined for this cpu that is going down) in the interrupt subsystem before the cpu goes offline, fixup_irqs() does: local_irq_enable(); mdelay(1); local_irq_disable(); Enabling interrupts is not a good thing as this cpu is already offline. So this patch replaces that logic with, mdelay(1); check APIC_IRR bits Retrigger the irq at the new destination if any interrupt has arrived via IPI. For IO-APIC level triggered interrupts, this retrigger IPI will appear as an edge interrupt. ack_apic_level() will detect this condition and IO-APIC RTE's remoteIRR is cleared using directed EOI(using IO-APIC EOI register) on Intel platforms and for others it uses the existing mask+edge logic followed by unmask+level. We can also remove mdelay() and then send spuriuous interrupts to new cpu targets for all the irqs that were handled previously by this cpu that is going offline. While it works, I have seen spurious interrupt messages (nothing wrong but still annoying messages during cpu offline, which can be seen during suspend/resume etc) Signed-off-by: Suresh Siddha Acked-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230002.043281924@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index b10a5e1..8a82728 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -281,7 +281,7 @@ EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ void fixup_irqs(void) { - unsigned int irq; + unsigned int irq, vector; static int warned; struct irq_desc *desc; @@ -336,9 +336,33 @@ void fixup_irqs(void) printk("Cannot set affinity for irq %i\n", irq); } - /* That doesn't seem sufficient. Give it 1ms. */ - local_irq_enable(); + /* + * We can remove mdelay() and then send spuriuous interrupts to + * new cpu targets for all the irqs that were handled previously by + * this cpu. While it works, I have seen spurious interrupt messages + * (nothing wrong but still...). + * + * So for now, retain mdelay(1) and check the IRR and then send those + * interrupts to new targets as this cpu is already offlined... + */ mdelay(1); - local_irq_disable(); + + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irr; + + if (__get_cpu_var(vector_irq)[vector] < 0) + continue; + + irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); + if (irr & (1 << (vector % 32))) { + irq = __get_cpu_var(vector_irq)[vector]; + + desc = irq_to_desc(irq); + spin_lock(&desc->lock); + if (desc->chip->retrigger) + desc->chip->retrigger(irq); + spin_unlock(&desc->lock); + } + } } #endif -- cgit v0.10.2 From c5e0cb3ddc5f14cedcfc50c0fb3b5fc6b56576da Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 28 Oct 2009 08:14:48 -0700 Subject: rcu: Cleanup: balance rcu_irq_enter()/rcu_irq_exit() calls Currently, rcu_irq_exit() is invoked only for CONFIG_NO_HZ, while rcu_irq_enter() is invoked unconditionally. This patch moves rcu_irq_exit() out from under CONFIG_NO_HZ so that the calls are balanced. This patch has no effect on the behavior of the kernel because both rcu_irq_enter() and rcu_irq_exit() are empty for !CONFIG_NO_HZ, but the code is easier to understand if the calls are obviously balanced in all cases. Signed-off-by: Lai Jiangshan Signed-off-by: Paul E. McKenney Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12567428891605-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/softirq.c b/kernel/softirq.c index f8749e5..21939d9 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -302,9 +302,9 @@ void irq_exit(void) if (!in_interrupt() && local_softirq_pending()) invoke_softirq(); + rcu_irq_exit(); #ifdef CONFIG_NO_HZ /* Make sure that timer wheel updates are propagated */ - rcu_irq_exit(); if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) tick_nohz_stop_sched_tick(0); #endif -- cgit v0.10.2 From afb7b4f08e274cecd8337f9444affa288a9cd4c1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 30 Oct 2009 16:28:23 -0200 Subject: perf tools: Factor out the map initialization Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256927305-4628-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 3064a05..4a158a0 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -105,6 +105,8 @@ struct symbol; typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); +void map__init(struct map *self, u64 start, u64 end, u64 pgoff, + struct dso *dso); struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, unsigned int sym_priv_size); struct map *map__clone(struct map *self); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index d302e51..3b7ce1b 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -20,6 +20,18 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen) return n; } +void map__init(struct map *self, u64 start, u64 end, u64 pgoff, + struct dso *dso) +{ + self->start = start; + self->end = end; + self->pgoff = pgoff; + self->dso = dso; + self->map_ip = map__map_ip; + self->unmap_ip = map__unmap_ip; + RB_CLEAR_NODE(&self->rb_node); +} + struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, unsigned int sym_priv_size) { @@ -28,6 +40,7 @@ struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, if (self != NULL) { const char *filename = event->filename; char newfilename[PATH_MAX]; + struct dso *dso; int anon; if (cwd) { @@ -47,20 +60,15 @@ struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, filename = newfilename; } - self->start = event->start; - self->end = event->start + event->len; - self->pgoff = event->pgoff; - - self->dso = dsos__findnew(filename, sym_priv_size); - if (self->dso == NULL) + dso = dsos__findnew(filename, sym_priv_size); + if (dso == NULL) goto out_delete; + map__init(self, event->start, event->start + event->len, + event->pgoff, dso); + if (self->dso == vdso || anon) self->map_ip = self->unmap_ip = identity__map_ip; - else { - self->map_ip = map__map_ip; - self->unmap_ip = map__unmap_ip; - } } return self; out_delete: diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0273d83..13677b5 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1132,18 +1132,12 @@ static struct map *map__new2(u64 start, struct dso *dso) struct map *self = malloc(sizeof(*self)); if (self != NULL) { - self->start = start; /* - * Will be filled after we load all the symbols + * ->end will be filled after we load all the symbols */ - self->end = 0; - - self->pgoff = 0; - self->dso = dso; - self->map_ip = map__map_ip; - self->unmap_ip = map__unmap_ip; - RB_CLEAR_NODE(&self->rb_node); + map__init(self, start, 0, 0, dso); } + return self; } -- cgit v0.10.2 From 00a192b395b0606ad0265243844b3cd68e73420a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 30 Oct 2009 16:28:24 -0200 Subject: perf tools: Simplify the symbol priv area mechanism Before we were storing this in the DSO, but in fact this is a property of the 'symbol' class, not something that will vary among DSOs, so move it to a global variable and initialize it using the existing symbol__init routine. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256927305-4628-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 8688bfe..77d50a6 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -55,11 +55,11 @@ struct sym_priv { static const char *sym_hist_filter; -static int symbol_filter(struct map *map, struct symbol *sym) +static int symbol_filter(struct map *map __used, struct symbol *sym) { if (sym_hist_filter == NULL || strcmp(sym->name, sym_hist_filter) == 0) { - struct sym_priv *priv = dso__sym_priv(map->dso, sym); + struct sym_priv *priv = symbol__priv(sym); const int size = (sizeof(*priv->hist) + (sym->end - sym->start) * sizeof(u64)); @@ -92,7 +92,7 @@ static void hist_hit(struct hist_entry *he, u64 ip) if (!sym || !he->map) return; - priv = dso__sym_priv(he->map->dso, sym); + priv = symbol__priv(sym); if (!priv->hist) return; @@ -202,8 +202,7 @@ got_map: static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { - struct map *map = map__new(&event->mmap, NULL, 0, - sizeof(struct sym_priv)); + struct map *map = map__new(&event->mmap, NULL, 0); struct thread *thread = threads__findnew(event->mmap.pid); dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n", @@ -355,7 +354,7 @@ static int parse_line(FILE *file, struct hist_entry *he, u64 len) unsigned int hits = 0; double percent = 0.0; const char *color; - struct sym_priv *priv = dso__sym_priv(he->map->dso, sym); + struct sym_priv *priv = symbol__priv(sym); struct sym_ext *sym_ext = priv->ext; struct sym_hist *h = priv->hist; @@ -422,7 +421,7 @@ static void insert_source_line(struct sym_ext *sym_ext) static void free_source_line(struct hist_entry *he, int len) { - struct sym_priv *priv = dso__sym_priv(he->map->dso, he->sym); + struct sym_priv *priv = symbol__priv(he->sym); struct sym_ext *sym_ext = priv->ext; int i; @@ -446,7 +445,7 @@ get_source_line(struct hist_entry *he, int len, const char *filename) int i; char cmd[PATH_MAX * 2]; struct sym_ext *sym_ext; - struct sym_priv *priv = dso__sym_priv(he->map->dso, sym); + struct sym_priv *priv = symbol__priv(sym); struct sym_hist *h = priv->hist; if (!h->sum) @@ -589,7 +588,7 @@ static void find_annotations(void) if (he->sym == NULL) continue; - priv = dso__sym_priv(he->map->dso, he->sym); + priv = symbol__priv(he->sym); if (priv->hist == NULL) continue; @@ -637,7 +636,7 @@ static int __cmd_annotate(void) exit(0); } - if (load_kernel(sizeof(struct sym_priv), symbol_filter) < 0) { + if (load_kernel(symbol_filter) < 0) { perror("failed to load kernel symbols"); return EXIT_FAILURE; } @@ -769,7 +768,7 @@ static void setup_sorting(void) int cmd_annotate(int argc, const char **argv, const char *prefix __used) { - symbol__init(); + symbol__init(sizeof(struct sym_priv)); page_size = getpagesize(); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f1bcd35..1a806d5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -751,7 +751,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) static int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { - struct map *map = map__new(&event->mmap, cwd, cwdlen, 0); + struct map *map = map__new(&event->mmap, cwd, cwdlen); struct thread *thread = threads__findnew(event->mmap.pid); dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", @@ -1093,7 +1093,7 @@ static void setup_list(struct strlist **list, const char *list_str, int cmd_report(int argc, const char **argv, const char *prefix __used) { - symbol__init(); + symbol__init(0); argc = parse_options(argc, argv, options, report_usage, 0); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 9a48d96..df44b75 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1937,7 +1937,7 @@ static int __cmd_record(int argc, const char **argv) int cmd_sched(int argc, const char **argv, const char *prefix __used) { - symbol__init(); + symbol__init(0); argc = parse_options(argc, argv, sched_options, sched_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 0a2f222..665877e 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1266,7 +1266,7 @@ static const struct option options[] = { int cmd_timechart(int argc, const char **argv, const char *prefix __used) { - symbol__init(); + symbol__init(0); page_size = getpagesize(); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ee87640..2aea913 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -789,7 +789,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) strstr(name, "_text_end")) return 1; - syme = dso__sym_priv(map->dso, sym); + syme = symbol__priv(sym); syme->map = map; pthread_mutex_init(&syme->source_lock, NULL); if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) @@ -807,8 +807,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) static int parse_symbols(void) { - if (dsos__load_kernel(vmlinux_name, sizeof(struct sym_entry), - symbol_filter, 1) <= 0) + if (dsos__load_kernel(vmlinux_name, symbol_filter, 1) <= 0) return -1; if (dump_symtab) @@ -859,7 +858,7 @@ static void event__process_sample(const event_t *self, int counter) return; } - syme = dso__sym_priv(map->dso, sym); + syme = symbol__priv(sym); if (!syme->skip) { syme->count[counter]++; @@ -878,8 +877,7 @@ static void event__process_mmap(event_t *self) struct thread *thread = threads__findnew(self->mmap.pid); if (thread != NULL) { - struct map *map = map__new(&self->mmap, NULL, 0, - sizeof(struct sym_entry)); + struct map *map = map__new(&self->mmap, NULL, 0); if (map != NULL) thread__insert_map(thread, map); } @@ -1176,7 +1174,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) { int counter; - symbol__init(); + symbol__init(sizeof(struct sym_entry)); page_size = sysconf(_SC_PAGE_SIZE); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e566bbe..d042d65 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -151,7 +151,7 @@ static const struct option options[] = { int cmd_trace(int argc, const char **argv, const char *prefix __used) { - symbol__init(); + symbol__init(0); argc = parse_options(argc, argv, options, annotate_usage, 0); if (argc) { diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index 18accb8..c458db9 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -130,7 +130,7 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, if (curr_handler->sample_type_check(sample_type) < 0) exit(-1); - if (load_kernel(0, NULL) < 0) { + if (load_kernel(NULL) < 0) { perror("failed to load kernel symbols"); return EXIT_FAILURE; } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 4a158a0..0a443be 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -107,8 +107,7 @@ typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); void map__init(struct map *self, u64 start, u64 end, u64 pgoff, struct dso *dso); -struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, - unsigned int sym_priv_size); +struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 3b7ce1b..679011c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -32,8 +32,7 @@ void map__init(struct map *self, u64 start, u64 end, u64 pgoff, RB_CLEAR_NODE(&self->rb_node); } -struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, - unsigned int sym_priv_size) +struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen) { struct map *self = malloc(sizeof(*self)); @@ -60,7 +59,7 @@ struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen, filename = newfilename; } - dso = dsos__findnew(filename, sym_priv_size); + dso = dsos__findnew(filename); if (dso == NULL) goto out_delete; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 13677b5..cf2c7f7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -26,6 +26,7 @@ static void dsos__add(struct dso *dso); static struct dso *dsos__find(const char *name); static struct map *map__new2(u64 start, struct dso *dso); static void kernel_maps__insert(struct map *map); +unsigned int symbol__priv_size; static struct rb_root kernel_maps; @@ -75,18 +76,17 @@ static void kernel_maps__fixup_end(void) } } -static struct symbol *symbol__new(u64 start, u64 len, const char *name, - unsigned int priv_size) +static struct symbol *symbol__new(u64 start, u64 len, const char *name) { size_t namelen = strlen(name) + 1; - struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); - + struct symbol *self = calloc(1, (symbol__priv_size + + sizeof(*self) + namelen)); if (!self) return NULL; - if (priv_size) { - memset(self, 0, priv_size); - self = ((void *)self) + priv_size; + if (symbol__priv_size) { + memset(self, 0, symbol__priv_size); + self = ((void *)self) + symbol__priv_size; } self->start = start; self->end = len ? start + len - 1 : start; @@ -98,9 +98,9 @@ static struct symbol *symbol__new(u64 start, u64 len, const char *name, return self; } -static void symbol__delete(struct symbol *self, unsigned int priv_size) +static void symbol__delete(struct symbol *self) { - free(((void *)self) - priv_size); + free(((void *)self) - symbol__priv_size); } static size_t symbol__fprintf(struct symbol *self, FILE *fp) @@ -109,7 +109,7 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp) self->start, self->end, self->name); } -struct dso *dso__new(const char *name, unsigned int sym_priv_size) +struct dso *dso__new(const char *name) { struct dso *self = malloc(sizeof(*self) + strlen(name) + 1); @@ -118,7 +118,6 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size) self->long_name = self->name; self->short_name = self->name; self->syms = RB_ROOT; - self->sym_priv_size = sym_priv_size; self->find_symbol = dso__find_symbol; self->slen_calculated = 0; self->origin = DSO__ORIG_NOT_FOUND; @@ -136,7 +135,7 @@ static void dso__delete_symbols(struct dso *self) pos = rb_entry(next, struct symbol, rb_node); next = rb_next(&pos->rb_node); rb_erase(&pos->rb_node, &self->syms); - symbol__delete(pos, self->sym_priv_size); + symbol__delete(pos); } } @@ -250,8 +249,7 @@ static int kernel_maps__load_all_kallsyms(void) /* * Will fix up the end later, when we have all symbols sorted. */ - sym = symbol__new(start, 0, symbol_name, - kernel_map->dso->sym_priv_size); + sym = symbol__new(start, 0, symbol_name); if (sym == NULL) goto out_delete_line; @@ -317,8 +315,7 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) snprintf(dso_name, sizeof(dso_name), "[kernel].%d", kernel_range++); - dso = dso__new(dso_name, - kernel_map->dso->sym_priv_size); + dso = dso__new(dso_name); if (dso == NULL) return -1; @@ -336,7 +333,7 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) if (filter && filter(map, pos)) { delete_symbol: rb_erase(&pos->rb_node, &kernel_map->dso->syms); - symbol__delete(pos, kernel_map->dso->sym_priv_size); + symbol__delete(pos); } else { if (map != kernel_map) { rb_erase(&pos->rb_node, &kernel_map->dso->syms); @@ -417,14 +414,13 @@ static int dso__load_perf_map(struct dso *self, struct map *map, if (len + 2 >= line_len) continue; - sym = symbol__new(start, size, line + len, - self->sym_priv_size); + sym = symbol__new(start, size, line + len); if (sym == NULL) goto out_delete_line; if (filter && filter(map, sym)) - symbol__delete(sym, self->sym_priv_size); + symbol__delete(sym); else { dso__insert_symbol(self, sym); nr_syms++; @@ -616,7 +612,7 @@ static int dso__synthesize_plt_symbols(struct dso *self) "%s@plt", elf_sym__name(&sym, symstrs)); f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size); + sympltname); if (!f) goto out_elf_end; @@ -634,7 +630,7 @@ static int dso__synthesize_plt_symbols(struct dso *self) "%s@plt", elf_sym__name(&sym, symstrs)); f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size); + sympltname); if (!f) goto out_elf_end; @@ -769,7 +765,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, if (kmodule) start += map->start + shdr.sh_offset; - curr_dso = dso__new(dso_name, self->sym_priv_size); + curr_dso = dso__new(dso_name); if (curr_dso == NULL) goto out_elf_end; curr_map = map__new2(start, curr_dso); @@ -803,14 +799,13 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, if (demangled != NULL) elf_name = demangled; new_symbol: - f = symbol__new(sym.st_value, sym.st_size, elf_name, - curr_dso->sym_priv_size); + f = symbol__new(sym.st_value, sym.st_size, elf_name); free(demangled); if (!f) goto out_elf_end; if (filter && filter(curr_map, f)) - symbol__delete(f, curr_dso->sym_priv_size); + symbol__delete(f); else { dso__insert_symbol(curr_dso, f); nr++; @@ -1141,7 +1136,7 @@ static struct map *map__new2(u64 start, struct dso *dso) return self; } -static int dsos__load_modules(unsigned int sym_priv_size) +static int dsos__load_modules(void) { char *line = NULL; size_t n; @@ -1180,7 +1175,7 @@ static int dsos__load_modules(unsigned int sym_priv_size) *sep = '\0'; snprintf(name, sizeof(name), "[%s]", line); - dso = dso__new(name, sym_priv_size); + dso = dso__new(name); if (dso == NULL) goto out_delete_line; @@ -1224,11 +1219,11 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, return err; } -int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, - symbol_filter_t filter, int use_modules) +int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, + int use_modules) { int err = -1; - struct dso *dso = dso__new(vmlinux, sym_priv_size); + struct dso *dso = dso__new(vmlinux); if (dso == NULL) return -1; @@ -1240,7 +1235,7 @@ int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; - if (use_modules && dsos__load_modules(sym_priv_size) < 0) { + if (use_modules && dsos__load_modules() < 0) { pr_warning("Failed to load list of modules in use! " "Continuing...\n"); use_modules = 0; @@ -1312,12 +1307,12 @@ static struct dso *dsos__find(const char *name) return NULL; } -struct dso *dsos__findnew(const char *name, unsigned int sym_priv_size) +struct dso *dsos__findnew(const char *name) { struct dso *dso = dsos__find(name); if (!dso) { - dso = dso__new(name, sym_priv_size); + dso = dso__new(name); if (dso != NULL) dsos__add(dso); } @@ -1333,13 +1328,12 @@ void dsos__fprintf(FILE *fp) dso__fprintf(pos, fp); } -int load_kernel(unsigned int sym_priv_size, symbol_filter_t filter) +int load_kernel(symbol_filter_t filter) { - if (dsos__load_kernel(vmlinux_name, sym_priv_size, filter, - modules) <= 0) + if (dsos__load_kernel(vmlinux_name, filter, modules) <= 0) return -1; - vdso = dso__new("[vdso]", 0); + vdso = dso__new("[vdso]"); if (!vdso) return -1; @@ -1348,7 +1342,8 @@ int load_kernel(unsigned int sym_priv_size, symbol_filter_t filter) return 0; } -void symbol__init(void) +void symbol__init(unsigned int priv_size) { elf_version(EV_CURRENT); + symbol__priv_size = priv_size; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 432edbc..a471a38 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -39,11 +39,17 @@ struct symbol { char name[0]; }; +extern unsigned int symbol__priv_size; + +static inline void *symbol__priv(struct symbol *self) +{ + return ((void *)self) - symbol__priv_size; +} + struct dso { struct list_head node; struct rb_root syms; struct symbol *(*find_symbol)(struct dso *, u64 ip); - unsigned int sym_priv_size; unsigned char adjust_symbols; unsigned char slen_calculated; bool loaded; @@ -53,28 +59,22 @@ struct dso { char name[0]; }; -struct dso *dso__new(const char *name, unsigned int sym_priv_size); +struct dso *dso__new(const char *name); void dso__delete(struct dso *self); -static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) -{ - return ((void *)sym) - self->sym_priv_size; -} - struct symbol *dso__find_symbol(struct dso *self, u64 ip); -int dsos__load_kernel(const char *vmlinux, unsigned int sym_priv_size, - symbol_filter_t filter, int modules); -struct dso *dsos__findnew(const char *name, unsigned int sym_priv_size); +int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, int modules); +struct dso *dsos__findnew(const char *name); int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); void dsos__fprintf(FILE *fp); size_t dso__fprintf(struct dso *self, FILE *fp); char dso__symtab_origin(const struct dso *self); -int load_kernel(unsigned int sym_priv_size, symbol_filter_t filter); +int load_kernel(symbol_filter_t filter); -void symbol__init(void); +void symbol__init(unsigned int priv_size); extern struct list_head dsos; extern struct map *kernel_map; -- cgit v0.10.2 From d70a5402f9c2e2671b809363616b3508b4c5a565 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 30 Oct 2009 16:28:25 -0200 Subject: perf tools: Improve message about missing symtabs for deleted DSOs Instead of: no symbols found in /usr/lib/gstreamer-0.10/libgsttypefindfunctions.so (deleted), maybe install a debug package? no symbols found in /usr/lib/gstreamer-0.10/libgstaudioconvert.so (deleted), maybe install a debug package? We now emit: /usr/lib/gstreamer-0.10/libgsttypefindfunctions.so was updated, restart the long running apps that use it! /usr/lib/gstreamer-0.10/libgstaudioconvert.so was updated, restart the long running apps that use it! Which is far less misleading about what the cause of the symbol mismatch is. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256927305-4628-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 679011c..f1e2169 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -75,6 +75,8 @@ out_delete: return NULL; } +#define DSO__DELETED "(deleted)" + struct symbol * map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter) { @@ -86,8 +88,18 @@ map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter) self->dso->long_name); return NULL; } else if (nr == 0) { - pr_warning("No symbols found in %s, maybe install a debug package?\n", - self->dso->long_name); + const char *name = self->dso->long_name; + const size_t len = strlen(name); + const size_t real_len = len - sizeof(DSO__DELETED); + + if (len > sizeof(DSO__DELETED) && + strcmp(name + real_len + 1, DSO__DELETED) == 0) + pr_warning("%.*s was updated, restart the " + "long running apps that use it!\n", + real_len, name); + else + pr_warning("no symbols found in %s, maybe " + "install a debug package?\n", name); return NULL; } } -- cgit v0.10.2 From 4dae560f97fa438f373b53e14b30149c9e44a600 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Fri, 30 Oct 2009 19:23:10 +0530 Subject: kprobes: Sanitize struct kretprobe_instance allocations For as long as kretprobes have existed, we've allocated NR_CPUS instances of kretprobe_instance structures. With the default value of CONFIG_NR_CPUS increasing on certain architectures, we are potentially wasting kernel memory. See http://sourceware.org/bugzilla/show_bug.cgi?id=10839#c3 for more details. Use a saner num_possible_cpus() instead of NR_CPUS for allocation. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Masami Hiramatsu Cc: Jim Keniston Cc: fweisbec@gmail.com LKML-Reference: <20091030135310.GA22230@in.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5240d75..1494e85 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1014,9 +1014,9 @@ int __kprobes register_kretprobe(struct kretprobe *rp) /* Pre-allocate memory for max kretprobe instances */ if (rp->maxactive <= 0) { #ifdef CONFIG_PREEMPT - rp->maxactive = max(10, 2 * NR_CPUS); + rp->maxactive = max(10, 2 * num_possible_cpus()); #else - rp->maxactive = NR_CPUS; + rp->maxactive = num_possible_cpus(); #endif } spin_lock_init(&rp->lock); -- cgit v0.10.2 From 900b20d5900045fb9b48f2fb3d80cbdbae3f44c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Nov 2009 19:25:25 +0100 Subject: perf tools: Fix missing symtabs printouts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: util/map.c: In function ‘map__find_symbol’: util/map.c:97: error: field precision should have type ‘int’, but argument 3 has type ‘size_t’ Also clean up some line wrap damage - we dont line-wrap printk messages. Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1256927305-4628-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index f1e2169..33f8684 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -93,13 +93,12 @@ map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter) const size_t real_len = len - sizeof(DSO__DELETED); if (len > sizeof(DSO__DELETED) && - strcmp(name + real_len + 1, DSO__DELETED) == 0) - pr_warning("%.*s was updated, restart the " - "long running apps that use it!\n", - real_len, name); - else - pr_warning("no symbols found in %s, maybe " - "install a debug package?\n", name); + strcmp(name + real_len + 1, DSO__DELETED) == 0) { + pr_warning("%.*s was updated, restart the long running apps that use it!\n", + (int)real_len, name); + } else { + pr_warning("no symbols found in %s, maybe install a debug package?\n", name); + } return NULL; } } -- cgit v0.10.2 From 31bde71c202722a76686c3cf69a254c8a912275a Mon Sep 17 00:00:00 2001 From: Matt Domsch Date: Tue, 3 Nov 2009 12:05:50 +1100 Subject: tpm: autoload tpm_tis based on system PnP IDs The tpm_tis driver already has a list of supported pnp_device_ids. This patch simply exports that list as a MODULE_DEVICE_TABLE() so that the module autoloader will discover and load the module at boottime. Signed-off-by: Matt Domsch Acked-by: Rajiv Andrade Signed-off-by: Andrew Morton Signed-off-by: James Morris diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 27e8de4..2405f17 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -637,6 +637,7 @@ static struct pnp_device_id tpm_pnp_tbl[] __devinitdata = { {"", 0}, /* User Specified */ {"", 0} /* Terminator */ }; +MODULE_DEVICE_TABLE(pnp, tpm_pnp_tbl); static __devexit void tpm_tis_pnp_remove(struct pnp_dev *dev) { -- cgit v0.10.2 From a489ca355efaf9efa4990b0f8f30ab650a206273 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 2 Nov 2009 16:59:15 -0800 Subject: x86: Make sure we also print a Code: line for show_regs() show_regs() is called as a mini BUG() equivalent in some places, specifically for the "scheduling while atomic" case. Unfortunately right now it does not print a Code: line unlike a real bug/oops. This patch changes the x86 implementation of show_regs() so that it calls the same function as oopses do to print the registers as well as the Code: line. Signed-off-by: Arjan van de Ven LKML-Reference: <20091102165915.4a980fc0@infradead.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf7956..e658331 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -187,7 +187,7 @@ void __show_regs(struct pt_regs *regs, int all) void show_regs(struct pt_regs *regs) { - __show_regs(regs, 1); + show_registers(regs); show_trace(NULL, regs, ®s->sp, regs->bp); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad535b6..2386999 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -226,8 +226,7 @@ void __show_regs(struct pt_regs *regs, int all) void show_regs(struct pt_regs *regs) { - printk(KERN_INFO "CPU %d:", smp_processor_id()); - __show_regs(regs, 1); + show_registers(regs); show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } -- cgit v0.10.2 From 12e4db4790b1bd2b7ec70eb2a1386c00fc683740 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2009 11:29:07 -0200 Subject: perf probe: Annotate variable initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Annotate away this false positive warning on older GCCs: cc1: warnings being treated as errors builtin-probe.c: In function ‘parse_probe_event’: builtin-probe.c:72: warning: ‘nc’ is used uninitialized in this function Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Masami Hiramatsu LKML-Reference: <1257254947-16789-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index a99a366..8124523 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -69,7 +69,7 @@ static struct { static void parse_probe_point(char *arg, struct probe_point *pp) { char *ptr, *tmp; - char c, nc; + char c, nc = 0; /* * * perf probe SRC:LN -- cgit v0.10.2 From fb0459d75c1d0a4ba3cafdd2c754e7486968a676 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 25 Sep 2009 12:25:56 +0200 Subject: perf/core: Provide a kernel-internal interface to get to performance counters There are reasons for kernel code to ask for, and use, performance counters. For example, in CPU freq governors this tends to be a good idea, but there are other examples possible as well of course. This patch adds the needed bits to do enable this functionality; they have been tested in an experimental cpufreq driver that I'm working on, and the changes are all that I needed to access counters properly. [fweisbec@gmail.com: added pid to perf_event_create_kernel_counter so that we can profile a particular task too TODO: Have a better error reporting, don't just return NULL in fail case.] v2: Remove the wrong comment about the fact perf_event_create_kernel_counter must be called from a kernel thread. Signed-off-by: Arjan van de Ven Acked-by: Peter Zijlstra Cc: "K.Prasad" Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt Cc: Jan Kiszka Cc: Avi Kivity LKML-Reference: <20090925122556.2f8bd939@infradead.org> Signed-off-by: Frederic Weisbecker diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index df9d964..fa151d4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -744,6 +744,12 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader, struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, int cpu); extern void perf_event_update_userpage(struct perf_event *event); +extern int perf_event_release_kernel(struct perf_event *event); +extern struct perf_event * +perf_event_create_kernel_counter(struct perf_event_attr *attr, + int cpu, + pid_t pid); +extern u64 perf_event_read_value(struct perf_event *event); struct perf_sample_data { u64 type; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 12b5ec3..02d4ff0 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1725,6 +1725,26 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } +int perf_event_release_kernel(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + perf_event_remove_from_context(event); + mutex_unlock(&ctx->mutex); + + mutex_lock(&event->owner->perf_event_mutex); + list_del_init(&event->owner_entry); + mutex_unlock(&event->owner->perf_event_mutex); + put_task_struct(event->owner); + + free_event(event); + + return 0; +} +EXPORT_SYMBOL_GPL(perf_event_release_kernel); + static int perf_event_read_size(struct perf_event *event) { int entry = sizeof(u64); /* value */ @@ -1750,7 +1770,7 @@ static int perf_event_read_size(struct perf_event *event) return size; } -static u64 perf_event_read_value(struct perf_event *event) +u64 perf_event_read_value(struct perf_event *event) { struct perf_event *child; u64 total = 0; @@ -1761,6 +1781,7 @@ static u64 perf_event_read_value(struct perf_event *event) return total; } +EXPORT_SYMBOL_GPL(perf_event_read_value); static int perf_event_read_entry(struct perf_event *event, u64 read_format, char __user *buf) @@ -4638,6 +4659,58 @@ err_put_context: return err; } +/** + * perf_event_create_kernel_counter + * + * @attr: attributes of the counter to create + * @cpu: cpu in which the counter is bound + * @pid: task to profile + */ +struct perf_event * +perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, + pid_t pid) +{ + struct perf_event *event; + struct perf_event_context *ctx; + int err; + + /* + * Get the target context (task or percpu): + */ + + ctx = find_get_context(pid, cpu); + if (IS_ERR(ctx)) + return NULL ; + + event = perf_event_alloc(attr, cpu, ctx, NULL, + NULL, GFP_KERNEL); + err = PTR_ERR(event); + if (IS_ERR(event)) + goto err_put_context; + + event->filp = NULL; + WARN_ON_ONCE(ctx->parent_ctx); + mutex_lock(&ctx->mutex); + perf_install_in_context(ctx, event, cpu); + ++ctx->generation; + mutex_unlock(&ctx->mutex); + + event->owner = current; + get_task_struct(current); + mutex_lock(¤t->perf_event_mutex); + list_add_tail(&event->owner_entry, ¤t->perf_event_list); + mutex_unlock(¤t->perf_event_mutex); + + return event; + +err_put_context: + if (err < 0) + put_ctx(ctx); + + return NULL; +} +EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); + /* * inherit a event from parent task to child task: */ -- cgit v0.10.2 From 41a48d14f6991020c9bb6b93e289ca5b411ed09a Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 5 Oct 2009 19:23:06 +0900 Subject: x86/hw-breakpoints: Actually flush thread breakpoints in flush_thread(). flush_thread() tries to do a TIF_DEBUG check before calling in to flush_thread_hw_breakpoint() (which subsequently clears the thread flag), but for some reason, the x86 code is manually clearing TIF_DEBUG immediately before the test, so this path will never be taken. This kills off the erroneous clear_tsk_thread_flag() and lets flush_thread_hw_breakpoint() actually get invoked. Presumably folks were getting lucky with testing and the free_thread_info() -> free_thread_xstate() path was taking care of the flush there. Signed-off-by: Paul Mundt Acked-by: "K.Prasad" Cc: Ingo Molnar Cc: Alan Stern LKML-Reference: <20091005102306.GA7889@linux-sh.org> Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2275ce5..cf8ee00 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -107,8 +107,6 @@ void flush_thread(void) } #endif - clear_tsk_thread_flag(tsk, TIF_DEBUG); - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) flush_thread_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); -- cgit v0.10.2 From c1e530178540df26eb39f10a972d06f96302ceb4 Mon Sep 17 00:00:00 2001 From: Thiago Farina Date: Tue, 3 Nov 2009 08:28:45 -0500 Subject: perf: Clean up trivial style issues in builtin-help.c Pointed out by checkpatch. Signed-off-by: Thiago Farina Cc: a.p.zijlstra@chello.nl Cc: paulus@samba.org Cc: Valdis.Kletnieks@vt.edu LKML-Reference: <1257254925-5423-1-git-send-email-tfransosi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 4fb8734..768f9c8 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -61,8 +61,7 @@ static const char *get_man_viewer_info(const char *name) { struct man_viewer_info_list *viewer; - for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) - { + for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) { if (!strcasecmp(name, viewer->name)) return viewer->info; } @@ -115,7 +114,7 @@ static int check_emacsclient_version(void) return 0; } -static void exec_woman_emacs(const char* path, const char *page) +static void exec_woman_emacs(const char *path, const char *page) { if (!check_emacsclient_version()) { /* This works only with emacsclient version >= 22. */ @@ -129,7 +128,7 @@ static void exec_woman_emacs(const char* path, const char *page) } } -static void exec_man_konqueror(const char* path, const char *page) +static void exec_man_konqueror(const char *path, const char *page) { const char *display = getenv("DISPLAY"); if (display && *display) { @@ -157,7 +156,7 @@ static void exec_man_konqueror(const char* path, const char *page) } } -static void exec_man_man(const char* path, const char *page) +static void exec_man_man(const char *path, const char *page) { if (!path) path = "man"; @@ -364,9 +363,8 @@ static void show_man_page(const char *perf_cmd) setup_man_path(); for (viewer = man_viewer_list; viewer; viewer = viewer->next) - { exec_viewer(viewer->name, page); /* will return when unable */ - } + if (fallback) exec_viewer(fallback, page); exec_viewer("man", page); -- cgit v0.10.2 From 6d7aa9d721c8c640066142fd9534afcdf68d7f9d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2009 15:52:18 -0200 Subject: perf symbols: Initialize dso->loaded Brown paper bag bug introduced in: 66bd8424cc05e800db384053bf7ab967e4658468 ("perf tools: Delay loading symtabs till we hit a map with it") Without this we were not loading any symtabs that happened to be on a DSO for which the allocated memory for ->loaded was !0. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1257270738-5669-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index cf2c7f7..93c49f4 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -121,6 +121,7 @@ struct dso *dso__new(const char *name) self->find_symbol = dso__find_symbol; self->slen_calculated = 0; self->origin = DSO__ORIG_NOT_FOUND; + self->loaded = false; } return self; -- cgit v0.10.2 From 97eaf5300b9d0cd99c310bf8c4a0f2f3296d88a3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 18 Oct 2009 15:33:50 +0200 Subject: perf/core: Add a callback to perf events A simple callback in a perf event can be used for multiple purposes. For example it is useful for triggered based events like hardware breakpoints that need a callback to dispatch a triggered breakpoint event. v2: Simplify a bit the callback attribution as suggested by Paul Mackerras Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: "K.Prasad" Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Paul Mackerras Cc: Mike Galbraith Cc: Paul Mundt diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fa151d4..8d54e6d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -544,6 +544,8 @@ struct perf_pending_entry { void (*func)(struct perf_pending_entry *); }; +typedef void (*perf_callback_t)(struct perf_event *, void *); + /** * struct perf_event - performance event kernel representation: */ @@ -639,6 +641,8 @@ struct perf_event { struct event_filter *filter; #endif + perf_callback_t callback; + #endif /* CONFIG_PERF_EVENTS */ }; @@ -748,7 +752,8 @@ extern int perf_event_release_kernel(struct perf_event *event); extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid); + pid_t pid, + perf_callback_t callback); extern u64 perf_event_read_value(struct perf_event *event); struct perf_sample_data { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 02d4ff0..5087125 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4293,6 +4293,7 @@ perf_event_alloc(struct perf_event_attr *attr, struct perf_event_context *ctx, struct perf_event *group_leader, struct perf_event *parent_event, + perf_callback_t callback, gfp_t gfpflags) { const struct pmu *pmu; @@ -4335,6 +4336,11 @@ perf_event_alloc(struct perf_event_attr *attr, event->state = PERF_EVENT_STATE_INACTIVE; + if (!callback && parent_event) + callback = parent_event->callback; + + event->callback = callback; + if (attr->disabled) event->state = PERF_EVENT_STATE_OFF; @@ -4611,7 +4617,7 @@ SYSCALL_DEFINE5(perf_event_open, } event = perf_event_alloc(&attr, cpu, ctx, group_leader, - NULL, GFP_KERNEL); + NULL, NULL, GFP_KERNEL); err = PTR_ERR(event); if (IS_ERR(event)) goto err_put_context; @@ -4668,7 +4674,7 @@ err_put_context: */ struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid) + pid_t pid, perf_callback_t callback) { struct perf_event *event; struct perf_event_context *ctx; @@ -4683,7 +4689,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, return NULL ; event = perf_event_alloc(attr, cpu, ctx, NULL, - NULL, GFP_KERNEL); + NULL, callback, GFP_KERNEL); err = PTR_ERR(event); if (IS_ERR(event)) goto err_put_context; @@ -4736,7 +4742,7 @@ inherit_event(struct perf_event *parent_event, child_event = perf_event_alloc(&parent_event->attr, parent_event->cpu, child_ctx, group_leader, parent_event, - GFP_KERNEL); + NULL, GFP_KERNEL); if (IS_ERR(child_event)) return child_event; get_ctx(child_ctx); -- cgit v0.10.2 From 97829de5a3b88899c5f3ac8802d11868bf4180ba Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 3 Nov 2009 14:02:05 -0500 Subject: x86, 64-bit: Fix bstep_iret jump This jump should be unconditional. Signed-off-by: Brian Gerst LKML-Reference: <1257274925-15713-1-git-send-email-brgerst@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index af0f4b22..1579a6c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1501,7 +1501,7 @@ error_kernelspace: bstep_iret: /* Fix truncated RIP */ movq %rcx,RIP+8(%rsp) - je error_swapgs + jmp error_swapgs END(error_entry) -- cgit v0.10.2 From 1477b6a7edd9ffa7bba4f9779ce9a76ce92761ed Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 16:14:16 +0900 Subject: sched: Remove unused __schedule() declaration __schedule() had been removed. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4AF129C8.3030008@ct.jp.nec.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60..f18102c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -349,7 +349,6 @@ extern signed long schedule_timeout(signed long timeout); extern signed long schedule_timeout_interruptible(signed long timeout); extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout); -asmlinkage void __schedule(void); asmlinkage void schedule(void); extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 9147a31..7d70146 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -870,7 +870,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks) /* * All threads that don't have debuggerinfo should be - * in __schedule() sleeping, since all other CPUs + * in schedule() sleeping, since all other CPUs * are in kgdb_wait, and thus have debuggerinfo. */ if (local_debuggerinfo) { -- cgit v0.10.2 From 2a2bb3142d326bb28b03875cabfc49baaac9a14a Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 16:16:10 +0900 Subject: sched: Remove unused time_sync_thresh declaration time_sync_thresh had been removed. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4AF12A3A.5050200@ct.jp.nec.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index f18102c..754b3de 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -171,8 +171,6 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) } #endif -extern unsigned long long time_sync_thresh; - /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). -- cgit v0.10.2 From 9824a2b728b63e7ff586b9fd9293c819be79f0f3 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 16:16:54 +0900 Subject: sched: Remove unused cpu_nr_migrations() cpu_nr_migrations() is not used, remove it. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4AF12A66.6020609@ct.jp.nec.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index 754b3de..dfc21fb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(void); -extern u64 cpu_nr_migrations(int cpu); extern unsigned long get_parent_ip(unsigned long addr); diff --git a/kernel/sched.c b/kernel/sched.c index 67be4d0..30fd0ba 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -541,7 +541,6 @@ struct rq { struct load_weight load; unsigned long nr_load_updates; u64 nr_switches; - u64 nr_migrations_in; struct cfs_rq cfs; struct rt_rq rt; @@ -2049,7 +2048,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) #endif if (old_cpu != new_cpu) { p->se.nr_migrations++; - new_rq->nr_migrations_in++; #ifdef CONFIG_SCHEDSTATS if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); @@ -2989,15 +2987,6 @@ static void calc_load_account_active(struct rq *this_rq) } /* - * Externally visible per-cpu scheduler statistics: - * cpu_nr_migrations(cpu) - number of migrations into that cpu - */ -u64 cpu_nr_migrations(int cpu) -{ - return cpu_rq(cpu)->nr_migrations_in; -} - -/* * Update rq->cpu_load[] statistics. This function is usually called every * scheduler tick (TICK_NSEC). */ -- cgit v0.10.2 From 2643ce11457a99a85c5bed8dd631e35968e6ca5a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 3 Nov 2009 21:46:10 -0200 Subject: perf symbols: Factor out buildid reading routine So that we can run it without having a DSO instance. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith LKML-Reference: <1257291970-8208-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ac94d7b..e7c7cdb 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -825,27 +825,27 @@ out_close: return err; } -#define BUILD_ID_SIZE 128 +#define BUILD_ID_SIZE 20 -static char *dso__read_build_id(struct dso *self) +int filename__read_build_id(const char *filename, void *bf, size_t size) { - int i; + int fd, err = -1; GElf_Ehdr ehdr; GElf_Shdr shdr; Elf_Data *build_id_data; Elf_Scn *sec; - char *build_id = NULL, *bid; - unsigned char *raw; Elf *elf; - int fd = open(self->long_name, O_RDONLY); + if (size < BUILD_ID_SIZE) + goto out; + + fd = open(filename, O_RDONLY); if (fd < 0) goto out; elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); if (elf == NULL) { - pr_err("%s: cannot read %s ELF file.\n", __func__, - self->long_name); + pr_err("%s: cannot read %s ELF file.\n", __func__, filename); goto out_close; } @@ -854,29 +854,46 @@ static char *dso__read_build_id(struct dso *self) goto out_elf_end; } - sec = elf_section_by_name(elf, &ehdr, &shdr, ".note.gnu.build-id", NULL); + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note.gnu.build-id", NULL); if (sec == NULL) goto out_elf_end; build_id_data = elf_getdata(sec, NULL); if (build_id_data == NULL) goto out_elf_end; - build_id = malloc(BUILD_ID_SIZE); + memcpy(bf, build_id_data->d_buf + 16, BUILD_ID_SIZE); + err = BUILD_ID_SIZE; +out_elf_end: + elf_end(elf); +out_close: + close(fd); +out: + return err; +} + +static char *dso__read_build_id(struct dso *self) +{ + int i, len; + char *build_id = NULL, *bid; + unsigned char rawbf[BUILD_ID_SIZE], *raw; + + len = filename__read_build_id(self->long_name, rawbf, sizeof(rawbf)); + if (len < 0) + goto out; + + build_id = malloc(len * 2 + 1); if (build_id == NULL) - goto out_elf_end; - raw = build_id_data->d_buf + 16; + goto out; bid = build_id; - for (i = 0; i < 20; ++i) { + raw = rawbf; + for (i = 0; i < len; ++i) { sprintf(bid, "%02x", *raw); ++raw; bid += 2; } pr_debug2("%s(%s): %s\n", __func__, self->long_name, build_id); -out_elf_end: - elf_end(elf); -out_close: - close(fd); out: return build_id; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0884330..e0d4a58 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -82,6 +82,8 @@ void dsos__fprintf(FILE *fp); size_t dso__fprintf(struct dso *self, FILE *fp); char dso__symtab_origin(const struct dso *self); +int filename__read_build_id(const char *filename, void *bf, size_t size); + int load_kernel(symbol_filter_t filter); void symbol__init(unsigned int priv_size); -- cgit v0.10.2 From 45a5c8bad827ebb9c9798becc15bce2e804d49e0 Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Wed, 4 Nov 2009 03:15:44 +0530 Subject: sched: Add USER_SCHED to feature removal list Peter Zijlstra suggested that we remove USER_SCHED at: http://lkml.org/lkml/2009/3/21/67 Removing USER_SCHED removes a lot of code from the scheduler and simplifies the code. We already have the ability to do user based classification which is tightened using PAM in userspace. Schedule USER_SCHED for removal in 2.6.34 Signed-off-by: Dhaval Giani Acked-by: Peter Zijlstra Cc: Balbir Singh Cc: Bharata B Rao Cc: Serge E. Hallyn Cc: Srivatsa Vaddagiri LKML-Reference: <20091103214544.GI5495@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index bc693ff..f613df8 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,6 +6,21 @@ be removed from this file. --------------------------- +What: USER_SCHED +When: 2.6.34 + +Why: USER_SCHED was implemented as a proof of concept for group scheduling. + The effect of USER_SCHED can already be achieved from userspace with + the help of libcgroup. The removal of USER_SCHED will also simplify + the scheduler code with the removal of one major ifdef. There are also + issues USER_SCHED has with USER_NS. A decision was taken not to fix + those and instead remove USER_SCHED. Also new group scheduling + features will not be implemented for USER_SCHED. + +Who: Dhaval Giani + +--------------------------- + What: PRISM54 When: 2.6.34 -- cgit v0.10.2 From c43f9d1e61e265c6bfafdd65c7f07c8d71a7efc3 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Nov 2009 19:12:13 -0500 Subject: perf/probes: Update Documentation/perf-probe.txt Update Documentation/perf-probe.txt accoding to recent syntax changes. Signed-off-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091104001212.3454.19415.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 6b6c6ae..9270594 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -8,7 +8,9 @@ perf-probe - Define new dynamic tracepoints SYNOPSIS -------- [verse] -'perf probe' [-k ] -P 'PROBE' [-P 'PROBE' ...] +'perf probe' [options] --add 'PROBE' [--add 'PROBE' ...] +or +'perf probe' [options] 'PROBE' ['PROBE' ...] DESCRIPTION @@ -21,26 +23,25 @@ and C local variables) with debuginfo. OPTIONS ------- -k:: ---vmlinux:: +--vmlinux=PATH:: Specify vmlinux path which has debuginfo (Dwarf binary). -v:: --verbose:: Be more verbose (show parsed arguments, etc). --P:: ---probe:: +-a:: +--add:: Define a probe point (see PROBE SYNTAX for detail) PROBE SYNTAX ------------ Probe points are defined by following syntax. - "TYPE:[GRP/]NAME FUNC[+OFFS][@SRC]|@SRC:LINE [ARG ...]" + "FUNC[+OFFS|:RLN|%return][@SRC]|SRC:ALN [ARG ...]" -'TYPE' specifies the type of probe point, you can use either "p" (kprobe) or "r" (kretprobe) for 'TYPE'. 'GRP' specifies the group name of this probe, and 'NAME' specifies the event name. If 'GRP' is omitted, "kprobes" is used for its group name. -'FUNC' and 'OFFS' specifies function and offset (in byte) where probe will be put. In addition, 'SRC' specifies a source file which has that function (this is mainly for inline functions). -You can specify a probe point by the source line number by using '@SRC:LINE' syntax, where 'SRC' is the source file path and 'LINE' is the line number. +'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, 'RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. In addition, 'SRC' specifies a source file which has that function. +It is also possible to specify a probe point by the source line number by using 'SRC:ALN' syntax, where 'SRC' is the source file path and 'ALN' is the line number. 'ARG' specifies the arguments of this probe point. You can use the name of local variable, or kprobe-tracer argument format (e.g. $retval, %ax, etc). SEE ALSO -- cgit v0.10.2 From a7f4328b91fb6e71dbe1fa4d46f3597c9555014d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Nov 2009 19:12:21 -0500 Subject: perf/probes: Improve error messages Improve error messages in perf-probe so that users can figure out problems easily. Reported-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091104001221.3454.52030.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 8124523..65bcaed 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -294,10 +294,11 @@ static int write_new_event(int fd, const char *buf) { int ret; - printf("Adding new event: %s\n", buf); ret = write(fd, buf, strlen(buf)); if (ret <= 0) - die("failed to create event."); + die("Failed to create event."); + else + printf("Added new event: %s\n", buf); return ret; } @@ -310,7 +311,7 @@ static int synthesize_probe_event(struct probe_point *pp) int i, len, ret; pp->probes[0] = buf = (char *)calloc(MAX_CMDLEN, sizeof(char)); if (!buf) - die("calloc"); + die("Failed to allocate memory by calloc."); ret = snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); if (ret <= 0 || ret >= MAX_CMDLEN) goto error; @@ -363,7 +364,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) if (ret == -E2BIG) semantic_error("probe point is too long."); else if (ret < 0) - die("snprintf"); + die("Failed to synthesize a probe point."); } #ifndef NO_LIBDWARF @@ -375,7 +376,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) else fd = open_default_vmlinux(); if (fd < 0) - die("vmlinux/module file open"); + die("Could not open vmlinux/module file."); /* Searching probe points */ for (j = 0; j < session.nr_probe; j++) { @@ -396,8 +397,13 @@ setup_probes: /* Settng up probe points */ snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path); fd = open(buf, O_WRONLY, O_APPEND); - if (fd < 0) - die("kprobe_events open"); + if (fd < 0) { + if (errno == ENOENT) + die("kprobe_events file does not exist - please rebuild with CONFIG_KPROBE_TRACER."); + else + die("Could not open kprobe_events file: %s", + strerror(errno)); + } for (j = 0; j < session.nr_probe; j++) { pp = &session.probes[j]; if (pp->found == 1) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index db96186..35d5a69 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -688,7 +688,7 @@ int find_probepoint(int fd, struct probe_point *pp) ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error); if (ret != DW_DLV_OK) - die("Failed to call dwarf_init(). Maybe, not a dwarf file.\n"); + die("No dwarf info found in the vmlinux - please rebuild with CONFIG_DEBUG_INFO.\n"); pp->found = 0; while (++cu_number) { -- cgit v0.10.2 From a225a1d911f0e434dc0407df29fd08e4388f3fa4 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Nov 2009 19:12:30 -0500 Subject: perf/probes: Fall back to non-dwarf if possible Fall back to non-dwarf probe point if the probe definition may not need dwarf analysis, when perf can't find vmlinux/debuginfo. This might skip some inlined code of target function. Signed-off-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091104001229.3454.63987.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 65bcaed..d111a93 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -189,7 +189,7 @@ static void parse_probe_event(const char *str) /* Parse probe point */ parse_probe_point(argv[0], pp); free(argv[0]); - if (pp->file) + if (pp->file || pp->line) session.need_dwarf = 1; /* Copy arguments */ @@ -347,36 +347,24 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) if (session.nr_probe == 0) usage_with_options(probe_usage, options); -#ifdef NO_LIBDWARF if (session.need_dwarf) - semantic_error("Dwarf-analysis is not supported"); -#endif - - /* Synthesize probes without dwarf */ - for (j = 0; j < session.nr_probe; j++) { -#ifndef NO_LIBDWARF - if (!session.probes[j].retprobe) { - session.need_dwarf = 1; - continue; - } -#endif - ret = synthesize_probe_event(&session.probes[j]); - if (ret == -E2BIG) - semantic_error("probe point is too long."); - else if (ret < 0) - die("Failed to synthesize a probe point."); - } - -#ifndef NO_LIBDWARF - if (!session.need_dwarf) - goto setup_probes; +#ifdef NO_LIBDWARF + semantic_error("Debuginfo-analysis is not supported"); +#else /* !NO_LIBDWARF */ + pr_info("Some probes require debuginfo.\n"); if (session.vmlinux) fd = open(session.vmlinux, O_RDONLY); else fd = open_default_vmlinux(); - if (fd < 0) - die("Could not open vmlinux/module file."); + if (fd < 0) { + if (session.need_dwarf) + die("Could not open vmlinux/module file."); + + pr_warning("Could not open vmlinux/module file." + " Try to use symbols.\n"); + goto end_dwarf; + } /* Searching probe points */ for (j = 0; j < session.nr_probe; j++) { @@ -386,14 +374,34 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) lseek(fd, SEEK_SET, 0); ret = find_probepoint(fd, pp); - if (ret <= 0) - die("No probe point found.\n"); + if (ret < 0) { + if (session.need_dwarf) + die("Could not analyze debuginfo."); + + pr_warning("An error occurred in debuginfo analysis. Try to use symbols.\n"); + break; + } + if (ret == 0) /* No error but failed to find probe point. */ + die("No probe point found."); } close(fd); -setup_probes: +end_dwarf: #endif /* !NO_LIBDWARF */ + /* Synthesize probes without dwarf */ + for (j = 0; j < session.nr_probe; j++) { + pp = &session.probes[j]; + if (pp->found) /* This probe is already found. */ + continue; + + ret = synthesize_probe_event(pp); + if (ret == -E2BIG) + semantic_error("probe point is too long."); + else if (ret < 0) + die("Failed to synthesize a probe point."); + } + /* Settng up probe points */ snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path); fd = open(buf, O_WRONLY, O_APPEND); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 35d5a69..293cdfc 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -687,8 +687,10 @@ int find_probepoint(int fd, struct probe_point *pp) struct probe_finder pf = {.pp = pp}; ret = dwarf_init(fd, DW_DLC_READ, 0, 0, &__dw_debug, &__dw_error); - if (ret != DW_DLV_OK) - die("No dwarf info found in the vmlinux - please rebuild with CONFIG_DEBUG_INFO.\n"); + if (ret != DW_DLV_OK) { + pr_warning("No dwarf info found in the vmlinux - please rebuild with CONFIG_DEBUG_INFO.\n"); + return -ENOENT; + } pp->found = 0; while (++cu_number) { -- cgit v0.10.2 From 91365bbe4f8c39a821f390f785d606304d6dee3c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Nov 2009 19:12:38 -0500 Subject: perf/probes: Rename perf probe events group name Rename the group name of perf probe events to 'probe'. Signed-off-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091104001238.3454.70508.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index d111a93..d78a3d9 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -52,7 +52,7 @@ const char *default_search_path[NR_SEARCH_PATH] = { #define MAX_PATH_LEN 256 #define MAX_PROBES 128 #define MAX_PROBE_ARGS 128 -#define PERFPROBE_GROUP "perfprobe" +#define PERFPROBE_GROUP "probe" /* Session management structure */ static struct { -- cgit v0.10.2 From 77b44d1b7c28360910cdbd427fb62d485c08674c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 3 Nov 2009 19:12:47 -0500 Subject: tracing/kprobes: Rename Kprobe-tracer to kprobe-event Rename Kprobes-based event tracer to kprobes-based tracing event (kprobe-event), since it is not a tracer but an extensible tracing event interface. This also changes CONFIG_KPROBE_TRACER to CONFIG_KPROBE_EVENT and sets it y by default. Signed-off-by: Masami Hiramatsu Acked-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju LKML-Reference: <20091104001247.3454.14131.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index 1541524..47aabee 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -1,26 +1,23 @@ - Kprobe-based Event Tracer - ========================= + Kprobe-based Event Tracing + ========================== Documentation is written by Masami Hiramatsu Overview -------- -This tracer is similar to the events tracer which is based on Tracepoint -infrastructure. Instead of Tracepoint, this tracer is based on kprobes(kprobe -and kretprobe). It probes anywhere where kprobes can probe(this means, all -functions body except for __kprobes functions). +These events are similar to tracepoint based events. Instead of Tracepoint, +this is based on kprobes (kprobe and kretprobe). So it can probe wherever +kprobes can probe (this means, all functions body except for __kprobes +functions). Unlike the Tracepoint based event, this can be added and removed +dynamically, on the fly. -Unlike the function tracer, this tracer can probe instructions inside of -kernel functions. It allows you to check which instruction has been executed. +To enable this feature, build your kernel with CONFIG_KPROBE_TRACING=y. -Unlike the Tracepoint based events tracer, this tracer can add and remove -probe points on the fly. - -Similar to the events tracer, this tracer doesn't need to be activated via -current_tracer, instead of that, just set probe points via -/sys/kernel/debug/tracing/kprobe_events. And you can set filters on each -probe events via /sys/kernel/debug/tracing/events/kprobes//filter. +Similar to the events tracer, this doesn't need to be activated via +current_tracer. Instead of that, add probe points via +/sys/kernel/debug/tracing/kprobe_events, and enable it via +/sys/kernel/debug/tracing/events/kprobes//enabled. Synopsis of kprobe_events @@ -55,9 +52,9 @@ Per-Probe Event Filtering ------------------------- Per-probe event filtering feature allows you to set different filter on each probe and gives you what arguments will be shown in trace buffer. If an event -name is specified right after 'p:' or 'r:' in kprobe_events, the tracer adds -an event under tracing/events/kprobes/, at the directory you can see -'id', 'enabled', 'format' and 'filter'. +name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event +under tracing/events/kprobes/, at the directory you can see 'id', +'enabled', 'format' and 'filter'. enabled: You can enable/disable the probe by writing 1 or 0 on it. @@ -71,6 +68,7 @@ filter: id: This shows the id of this probe event. + Event Profiling --------------- You can check the total number of probe hits and probe miss-hits via diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 15372a9..f056716 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -428,17 +428,22 @@ config BLK_DEV_IO_TRACE If unsure, say N. -config KPROBE_TRACER +config KPROBE_EVENT depends on KPROBES depends on X86 - bool "Trace kprobes" + bool "Enable kprobes-based dynamic events" select TRACING - select GENERIC_TRACER + default y help - This tracer probes everywhere where kprobes can probe it, and - records various registers and memories specified by user. - This also allows you to trace kprobe probe points as a dynamic - defined events. It provides per-probe event filtering interface. + This allows the user to add tracing events (similar to tracepoints) on the fly + via the ftrace interface. See Documentation/trace/kprobetrace.txt + for more details. + + Those events can be inserted wherever kprobes can probe, and record + various register and memory values. + + This option is also required by perf-probe subcommand of perf tools. If + you want to use perf tools, this option is strongly recommended. config DYNAMIC_FTRACE bool "enable/disable ftrace tracepoints dynamically" diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c8cb75d..edc3a3c 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -53,7 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o -obj-$(CONFIG_KPROBE_TRACER) += trace_kprobe.o +obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o obj-$(CONFIG_EVENT_TRACING) += power-traces.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index a86c3ac..cf17a66 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1,5 +1,5 @@ /* - * kprobe based kernel tracer + * Kprobes-based tracing events * * Created by Masami Hiramatsu * @@ -57,8 +57,6 @@ const char *reserved_field_names[] = { FIELD_STRING_FUNC, }; -/* currently, trace_kprobe only supports X86. */ - struct fetch_func { unsigned long (*func)(struct pt_regs *, void *); void *data; @@ -191,7 +189,7 @@ static __kprobes void free_indirect_fetch_data(struct indirect_fetch_data *data) } /** - * Kprobe tracer core functions + * Kprobe event core functions */ struct probe_arg { -- cgit v0.10.2 From 09879b99d44d701c603935ef2549004405d7f8f9 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Wed, 4 Nov 2009 12:58:15 +0900 Subject: x86: Gitignore: arch/x86/lib/inat-tables.c Ignore generated file arch/x86/lib/inat-tables.c. Signed-off-by: Hiroshi Shimamoto Acked-by: Masami Hiramatsu LKML-Reference: <4AF0FBD7.7000501@ct.jp.nec.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/lib/.gitignore b/arch/x86/lib/.gitignore new file mode 100644 index 0000000..8df89f0 --- /dev/null +++ b/arch/x86/lib/.gitignore @@ -0,0 +1 @@ +inat-tables.c -- cgit v0.10.2 From e2c880630438f80b474378d5487b511b07665051 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:53:15 +1030 Subject: cpumask: Simplify sched_rt.c find_lowest_rq() wants to call pick_optimal_cpu() on the intersection of sched_domain_span(sd) and lowest_mask. Rather than doing a cpus_and into a temporary, we can open-code it. This actually makes the code slightly clearer, IMHO. Signed-off-by: Rusty Russell Acked-by: Gregory Haskins Cc: Steven Rostedt LKML-Reference: <200911031453.15350.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index a4d790c..5c5fef3 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1153,29 +1153,12 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); -static inline int pick_optimal_cpu(int this_cpu, - const struct cpumask *mask) -{ - int first; - - /* "this_cpu" is cheaper to preempt than a remote processor */ - if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask)) - return this_cpu; - - first = cpumask_first(mask); - if (first < nr_cpu_ids) - return first; - - return -1; -} - static int find_lowest_rq(struct task_struct *task) { struct sched_domain *sd; struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); int this_cpu = smp_processor_id(); int cpu = task_cpu(task); - cpumask_var_t domain_mask; if (task->rt.nr_cpus_allowed == 1) return -1; /* No other targets possible */ @@ -1198,28 +1181,26 @@ static int find_lowest_rq(struct task_struct *task) * Otherwise, we consult the sched_domains span maps to figure * out which cpu is logically closest to our hot cache data. */ - if (this_cpu == cpu) - this_cpu = -1; /* Skip this_cpu opt if the same */ - - if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) { - for_each_domain(cpu, sd) { - if (sd->flags & SD_WAKE_AFFINE) { - int best_cpu; + if (!cpumask_test_cpu(this_cpu, lowest_mask)) + this_cpu = -1; /* Skip this_cpu opt if not among lowest */ - cpumask_and(domain_mask, - sched_domain_span(sd), - lowest_mask); + for_each_domain(cpu, sd) { + if (sd->flags & SD_WAKE_AFFINE) { + int best_cpu; - best_cpu = pick_optimal_cpu(this_cpu, - domain_mask); - - if (best_cpu != -1) { - free_cpumask_var(domain_mask); - return best_cpu; - } - } + /* + * "this_cpu" is cheaper to preempt than a + * remote processor. + */ + if (this_cpu != -1 && + cpumask_test_cpu(this_cpu, sched_domain_span(sd))) + return this_cpu; + + best_cpu = cpumask_first_and(lowest_mask, + sched_domain_span(sd)); + if (best_cpu < nr_cpu_ids) + return best_cpu; } - free_cpumask_var(domain_mask); } /* @@ -1227,7 +1208,13 @@ static int find_lowest_rq(struct task_struct *task) * just give the caller *something* to work with from the compatible * locations. */ - return pick_optimal_cpu(this_cpu, lowest_mask); + if (this_cpu != -1) + return this_cpu; + + cpu = cpumask_any(lowest_mask); + if (cpu < nr_cpu_ids) + return cpu; + return -1; } /* Will lock the rq it finds */ -- cgit v0.10.2 From acc3f5d7cabbfd6cec71f0c1f9900621fa2d6ae7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:53:40 +1030 Subject: cpumask: Partition_sched_domains takes array of cpumask_var_t Currently partition_sched_domains() takes a 'struct cpumask *doms_new' which is a kmalloc'ed array of cpumask_t. You can't have such an array if 'struct cpumask' is undefined, as we plan for CONFIG_CPUMASK_OFFSTACK=y. So, we make this an array of cpumask_var_t instead: this is the same for the CONFIG_CPUMASK_OFFSTACK=n case, but requires multiple allocations for the CONFIG_CPUMASK_OFFSTACK=y case. Hence we add alloc_sched_domains() and free_sched_domains() functions. Signed-off-by: Rusty Russell Cc: Peter Zijlstra LKML-Reference: <200911031453.40668.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index dfc21fb..78ba664 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1009,9 +1009,13 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) return to_cpumask(sd->span); } -extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new); +/* Allocate an array of sched domains, for partition_sched_domains(). */ +cpumask_var_t *alloc_sched_domains(unsigned int ndoms); +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); + /* Test a flag in parent sched domain */ static inline int test_sd_parent(struct sched_domain *sd, int flag) { @@ -1029,7 +1033,7 @@ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); struct sched_domain_attr; static inline void -partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d247381..3cf2183 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -537,8 +537,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * element of the partition (one sched domain) to be passed to * partition_sched_domains(). */ -/* FIXME: see the FIXME in partition_sched_domains() */ -static int generate_sched_domains(struct cpumask **domains, +static int generate_sched_domains(cpumask_var_t **domains, struct sched_domain_attr **attributes) { LIST_HEAD(q); /* queue of cpusets to be scanned */ @@ -546,7 +545,7 @@ static int generate_sched_domains(struct cpumask **domains, struct cpuset **csa; /* array of all cpuset ptrs */ int csn; /* how many cpuset ptrs in csa so far */ int i, j, k; /* indices for partition finding loops */ - struct cpumask *doms; /* resulting partition; i.e. sched domains */ + cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ struct sched_domain_attr *dattr; /* attributes for custom domains */ int ndoms = 0; /* number of sched domains in result */ int nslot; /* next empty doms[] struct cpumask slot */ @@ -557,7 +556,8 @@ static int generate_sched_domains(struct cpumask **domains, /* Special case for the 99% of systems with one, full, sched domain */ if (is_sched_load_balance(&top_cpuset)) { - doms = kmalloc(cpumask_size(), GFP_KERNEL); + ndoms = 1; + doms = alloc_sched_domains(ndoms); if (!doms) goto done; @@ -566,9 +566,8 @@ static int generate_sched_domains(struct cpumask **domains, *dattr = SD_ATTR_INIT; update_domain_attr_tree(dattr, &top_cpuset); } - cpumask_copy(doms, top_cpuset.cpus_allowed); + cpumask_copy(doms[0], top_cpuset.cpus_allowed); - ndoms = 1; goto done; } @@ -636,7 +635,7 @@ restart: * Now we know how many domains to create. * Convert to and populate cpu masks. */ - doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL); + doms = alloc_sched_domains(ndoms); if (!doms) goto done; @@ -656,7 +655,7 @@ restart: continue; } - dp = doms + nslot; + dp = doms[nslot]; if (nslot == ndoms) { static int warnings = 10; @@ -718,7 +717,7 @@ done: static void do_rebuild_sched_domains(struct work_struct *unused) { struct sched_domain_attr *attr; - struct cpumask *doms; + cpumask_var_t *doms; int ndoms; get_online_cpus(); @@ -2052,7 +2051,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, unsigned long phase, void *unused_cpu) { struct sched_domain_attr *attr; - struct cpumask *doms; + cpumask_var_t *doms; int ndoms; switch (phase) { diff --git a/kernel/sched.c b/kernel/sched.c index 30fd0ba..ae026aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8846,7 +8846,7 @@ static int build_sched_domains(const struct cpumask *cpu_map) return __build_sched_domains(cpu_map, NULL); } -static struct cpumask *doms_cur; /* current sched domains */ +static cpumask_var_t *doms_cur; /* current sched domains */ static int ndoms_cur; /* number of sched domains in 'doms_cur' */ static struct sched_domain_attr *dattr_cur; /* attribues of custom domains in 'doms_cur' */ @@ -8868,6 +8868,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void) return 0; } +cpumask_var_t *alloc_sched_domains(unsigned int ndoms) +{ + int i; + cpumask_var_t *doms; + + doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL); + if (!doms) + return NULL; + for (i = 0; i < ndoms; i++) { + if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) { + free_sched_domains(doms, i); + return NULL; + } + } + return doms; +} + +void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) +{ + unsigned int i; + for (i = 0; i < ndoms; i++) + free_cpumask_var(doms[i]); + kfree(doms); +} + /* * Set up scheduler domains and groups. Callers must hold the hotplug lock. * For now this just excludes isolated cpus, but could be used to @@ -8879,12 +8904,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) arch_update_cpu_topology(); ndoms_cur = 1; - doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); + doms_cur = alloc_sched_domains(ndoms_cur); if (!doms_cur) - doms_cur = fallback_doms; - cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); + doms_cur = &fallback_doms; + cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); dattr_cur = NULL; - err = build_sched_domains(doms_cur); + err = build_sched_domains(doms_cur[0]); register_sched_domain_sysctl(); return err; @@ -8934,19 +8959,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * doms_new[] to the current sched domain partitioning, doms_cur[]. * It destroys each deleted domain and builds each new domain. * - * 'doms_new' is an array of cpumask's of length 'ndoms_new'. + * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'. * The masks don't intersect (don't overlap.) We should setup one * sched domain for each mask. CPUs not in any of the cpumasks will * not be load balanced. If the same cpumask appears both in the * current 'doms_cur' domains and in the new 'doms_new', we can leave * it as it is. * - * The passed in 'doms_new' should be kmalloc'd. This routine takes - * ownership of it and will kfree it when done with it. If the caller - * failed the kmalloc call, then it can pass in doms_new == NULL && - * ndoms_new == 1, and partition_sched_domains() will fallback to - * the single partition 'fallback_doms', it also forces the domains - * to be rebuilt. + * The passed in 'doms_new' should be allocated using + * alloc_sched_domains. This routine takes ownership of it and will + * free_sched_domains it when done with it. If the caller failed the + * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1, + * and partition_sched_domains() will fallback to the single partition + * 'fallback_doms', it also forces the domains to be rebuilt. * * If doms_new == NULL it will be replaced with cpu_online_mask. * ndoms_new == 0 is a special case for destroying existing domains, @@ -8954,8 +8979,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, * * Call with hotplug lock held */ -/* FIXME: Change to struct cpumask *doms_new[] */ -void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, +void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new) { int i, j, n; @@ -8974,40 +8998,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, /* Destroy deleted domains */ for (i = 0; i < ndoms_cur; i++) { for (j = 0; j < n && !new_topology; j++) { - if (cpumask_equal(&doms_cur[i], &doms_new[j]) + if (cpumask_equal(doms_cur[i], doms_new[j]) && dattrs_equal(dattr_cur, i, dattr_new, j)) goto match1; } /* no match - a current sched domain not in new doms_new[] */ - detach_destroy_domains(doms_cur + i); + detach_destroy_domains(doms_cur[i]); match1: ; } if (doms_new == NULL) { ndoms_cur = 0; - doms_new = fallback_doms; - cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); + doms_new = &fallback_doms; + cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); WARN_ON_ONCE(dattr_new); } /* Build new domains */ for (i = 0; i < ndoms_new; i++) { for (j = 0; j < ndoms_cur && !new_topology; j++) { - if (cpumask_equal(&doms_new[i], &doms_cur[j]) + if (cpumask_equal(doms_new[i], doms_cur[j]) && dattrs_equal(dattr_new, i, dattr_cur, j)) goto match2; } /* no match - add a new doms_new */ - __build_sched_domains(doms_new + i, + __build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL); match2: ; } /* Remember the new sched domains */ - if (doms_cur != fallback_doms) - kfree(doms_cur); + if (doms_cur != &fallback_doms) + free_sched_domains(doms_cur, ndoms_cur); kfree(dattr_cur); /* kfree(NULL) is safe */ doms_cur = doms_new; dattr_cur = dattr_new; -- cgit v0.10.2 From ce7c42710e2dd133f10b7fc9ed9c73bdd2435f7a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 3 Nov 2009 14:53:52 +1030 Subject: cpumask: Avoid cpumask_t in arch/x86/kernel/apic/nmi.c Ingo wants the certainty of a static cpumask (rather than a cpumask_var_t), but cpumask_t will some day be undefined to avoid on-stack declarations. This is what DECLARE_BITMAP/to_cpumask() is for. Signed-off-by: Rusty Russell LKML-Reference: <200911031453.52394.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 7ff61d6..6389432 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -39,7 +39,8 @@ int unknown_nmi_panic; int nmi_watchdog_enabled; -static cpumask_t backtrace_mask __read_mostly; +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled @@ -414,7 +415,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) } /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (cpumask_test_cpu(cpu, &backtrace_mask)) { + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); @@ -422,7 +423,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) show_regs(regs); dump_stack(); spin_unlock(&lock); - cpumask_clear_cpu(cpu, &backtrace_mask); + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); rc = 1; } @@ -558,14 +559,14 @@ void arch_trigger_all_cpu_backtrace(void) { int i; - cpumask_copy(&backtrace_mask, cpu_online_mask); + cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); printk(KERN_INFO "sending NMI to all CPUs:\n"); apic->send_IPI_all(NMI_VECTOR); /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(&backtrace_mask)) + if (cpumask_empty(to_cpumask(backtrace_mask))) break; mdelay(1); } -- cgit v0.10.2 From 24b26d4211130b6455692804c14d537158855cd7 Mon Sep 17 00:00:00 2001 From: Liuweni Date: Wed, 4 Nov 2009 20:11:05 +0800 Subject: irq: Fix docbook comments Fix docbook comments to match the actual function names (set_irq_msi, handle_percpu_irq). Signed-off-by: Liuwenyi Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index c166019..ba566c2 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -166,11 +166,11 @@ int set_irq_data(unsigned int irq, void *data) EXPORT_SYMBOL(set_irq_data); /** - * set_irq_data - set irq type data for an irq + * set_irq_msi - set MSI descriptor data for an irq * @irq: Interrupt number * @entry: Pointer to MSI descriptor data * - * Set the hardware irq controller data for an irq + * Set the MSI descriptor entry for an irq */ int set_irq_msi(unsigned int irq, struct msi_desc *entry) { @@ -590,7 +590,7 @@ out_unlock: } /** - * handle_percpu_IRQ - Per CPU local irq handler + * handle_percpu_irq - Per CPU local irq handler * @irq: the interrupt number * @desc: the interrupt description structure for this irq * -- cgit v0.10.2 From 663e69592856df53ef52969482ef413a96bc4e06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 Nov 2009 14:22:21 +0100 Subject: irq: Remove unused debug_poll_all_shared_irqs() commit 74296a8ed added this function for debug purposes, but it was never used for anything. Remove it. Signed-off-by: Thomas Gleixner diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 7ca72b7..75f3f00 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -603,12 +603,6 @@ static inline void init_irq_proc(void) } #endif -#if defined(CONFIG_GENERIC_HARDIRQS) && defined(CONFIG_DEBUG_SHIRQ) -extern void debug_poll_all_shared_irqs(void); -#else -static inline void debug_poll_all_shared_irqs(void) { } -#endif - struct seq_file; int show_interrupts(struct seq_file *p, void *v); diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 114e704..8996b98 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -104,7 +104,7 @@ static int misrouted_irq(int irq) return ok; } -static void poll_all_shared_irqs(void) +static void poll_spurious_irqs(unsigned long dummy) { struct irq_desc *desc; int i; @@ -123,23 +123,11 @@ static void poll_all_shared_irqs(void) try_one_irq(i, desc); } -} - -static void poll_spurious_irqs(unsigned long dummy) -{ - poll_all_shared_irqs(); mod_timer(&poll_spurious_irq_timer, jiffies + POLL_SPURIOUS_IRQ_INTERVAL); } -#ifdef CONFIG_DEBUG_SHIRQ -void debug_poll_all_shared_irqs(void) -{ - poll_all_shared_irqs(); -} -#endif - /* * If 99,900 of the previous 100,000 interrupts have not been handled * then assume that the IRQ is stuck in some manner. Drop a diagnostic -- cgit v0.10.2 From 2a855dd01bc1539111adb7233f587c5c468732ac Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sun, 25 Oct 2009 15:37:58 +0100 Subject: signal: Fix alternate signal stack check All architectures in the kernel increment/decrement the stack pointer before storing values on the stack. On architectures which have the stack grow down sas_ss_sp == sp is not on the alternate signal stack while sas_ss_sp + sas_ss_size == sp is on the alternate signal stack. On architectures which have the stack grow up sas_ss_sp == sp is on the alternate signal stack while sas_ss_sp + sas_ss_size == sp is not on the alternate signal stack. The current implementation fails for architectures which have the stack grow down on the corner case where sas_ss_sp == sp.This was reported as Debian bug #544905 on AMD64. Simplified test case: http://download.breakpoint.cc/tc-sig-stack.c The test case creates the following stack scenario: 0xn0300 stack top 0xn0200 alt stack pointer top (when switching to alt stack) 0xn01ff alt stack end 0xn0100 alt stack start == stack pointer If the signal is sent the stack pointer is pointing to the base address of the alt stack and the kernel erroneously decides that it has already switched to the alternate stack because of the current check for "sp - sas_ss_sp < sas_ss_size" On parisc (stack grows up) the scenario would be: 0xn0200 stack pointer 0xn01ff alt stack end 0xn0100 alt stack start = alt stack pointer base (when switching to alt stack) 0xn0000 stack base This is handled correctly by the current implementation. [ tglx: Modified for archs which have the stack grow up (parisc) which would fail with the correct implementation for stack grows down. Added a check for sp >= current->sas_ss_sp which is strictly not necessary but makes the code symetric for both variants ] Signed-off-by: Sebastian Andrzej Siewior Cc: Oleg Nesterov Cc: Roland McGrath Cc: Kyle McMartin Cc: stable@kernel.org LKML-Reference: <20091025143758.GA6653@Chamillionaire.breakpoint.cc> Signed-off-by: Thomas Gleixner diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60..0f67914 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2086,11 +2086,18 @@ static inline int is_si_special(const struct siginfo *info) return info <= SEND_SIG_FORCED; } -/* True if we are on the alternate signal stack. */ - +/* + * True if we are on the alternate signal stack. + */ static inline int on_sig_stack(unsigned long sp) { - return (sp - current->sas_ss_sp < current->sas_ss_size); +#ifdef CONFIG_STACK_GROWSUP + return sp >= current->sas_ss_sp && + sp - current->sas_ss_sp < current->sas_ss_size; +#else + return sp > current->sas_ss_sp && + sp - current->sas_ss_sp <= current->sas_ss_size; +#endif } static inline int sas_ss_flags(unsigned long sp) -- cgit v0.10.2 From a1f84a3ab8e002159498814eaa7e48c33752b04b Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 27 Oct 2009 15:35:38 +0100 Subject: sched: Check for an idle shared cache in select_task_rq_fair() When waking affine, check for an idle shared cache, and if found, wake to that CPU/sibling instead of the waker's CPU. This improves pgsql+oltp ramp up by roughly 8%. Possibly more for other loads, depending on overlap. The trade-off is a roughly 1% peak downturn if tasks are truly synchronous. Signed-off-by: Mike Galbraith Cc: Arjan van de Ven Cc: Peter Zijlstra Cc: LKML-Reference: <1256654138.17752.7.camel@marge.simson.net> Signed-off-by: Ingo Molnar diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 4e777b4..da87385 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1372,11 +1372,36 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag want_sd = 0; } - if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && - cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { + if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) { + int candidate = -1, i; - affine_sd = tmp; - want_affine = 0; + if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) + candidate = cpu; + + /* + * Check for an idle shared cache. + */ + if (tmp->flags & SD_PREFER_SIBLING) { + if (candidate == cpu) { + if (!cpu_rq(prev_cpu)->cfs.nr_running) + candidate = prev_cpu; + } + + if (candidate == -1 || candidate == cpu) { + for_each_cpu(i, sched_domain_span(tmp)) { + if (!cpu_rq(i)->cfs.nr_running) { + candidate = i; + break; + } + } + } + } + + if (candidate >= 0) { + affine_sd = tmp; + want_affine = 0; + cpu = candidate; + } } if (!want_sd && !want_affine) -- cgit v0.10.2 From 1b9508f6831e10d53256825de8904caa22d1ca2c Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 4 Nov 2009 17:53:50 +0100 Subject: sched: Rate-limit newidle Rate limit newidle to migration_cost. It's a win for all stages of sysbench oltp tests. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index ae026aa..f849212 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -589,6 +589,8 @@ struct rq { u64 rt_avg; u64 age_stamp; + u64 idle_stamp; + u64 avg_idle; #endif /* calc_load related fields */ @@ -2353,6 +2355,17 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, if (rq != orig_rq) update_rq_clock(rq); + if (rq->idle_stamp) { + u64 delta = rq->clock - rq->idle_stamp; + u64 max = 2*sysctl_sched_migration_cost; + + if (delta > max) + rq->avg_idle = max; + else + update_avg(&rq->avg_idle, delta); + rq->idle_stamp = 0; + } + WARN_ON(p->state != TASK_WAKING); cpu = task_cpu(p); @@ -4389,6 +4402,11 @@ static void idle_balance(int this_cpu, struct rq *this_rq) int pulled_task = 0; unsigned long next_balance = jiffies + HZ; + this_rq->idle_stamp = this_rq->clock; + + if (this_rq->avg_idle < sysctl_sched_migration_cost) + return; + for_each_domain(this_cpu, sd) { unsigned long interval; @@ -4403,8 +4421,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) interval = msecs_to_jiffies(sd->balance_interval); if (time_after(next_balance, sd->last_balance + interval)) next_balance = sd->last_balance + interval; - if (pulled_task) + if (pulled_task) { + this_rq->idle_stamp = 0; break; + } } if (pulled_task || time_after(jiffies, this_rq->next_balance)) { /* diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index efb8440..6988cf0 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -285,12 +285,16 @@ static void print_cpu(struct seq_file *m, int cpu) #ifdef CONFIG_SCHEDSTATS #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n); +#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n); P(yld_count); P(sched_switch); P(sched_count); P(sched_goidle); +#ifdef CONFIG_SMP + P64(avg_idle); +#endif P(ttwu_count); P(ttwu_local); -- cgit v0.10.2 From fd210738f6601d0fb462df9a2fe5a41896ff6a8f Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 5 Nov 2009 10:57:46 +0100 Subject: sched: Fix affinity logic in select_task_rq_fair() Ingo Molnar reported: [ 26.804000] BUG: using smp_processor_id() in preemptible [00000000] code: events/1/10 [ 26.808000] caller is vmstat_update+0x26/0x70 [ 26.812000] Pid: 10, comm: events/1 Not tainted 2.6.32-rc5 #6887 [ 26.816000] Call Trace: [ 26.820000] [] ? printk+0x28/0x3c [ 26.824000] [] debug_smp_processor_id+0xf0/0x110 [ 26.824000] mount used greatest stack depth: 1464 bytes left [ 26.828000] [] vmstat_update+0x26/0x70 [ 26.832000] [] worker_thread+0x188/0x310 [ 26.836000] [] ? worker_thread+0x127/0x310 [ 26.840000] [] ? autoremove_wake_function+0x0/0x60 [ 26.844000] [] ? worker_thread+0x0/0x310 [ 26.848000] [] kthread+0x7c/0x90 [ 26.852000] [] ? kthread+0x0/0x90 [ 26.856000] [] kernel_thread_helper+0x7/0x10 [ 26.860000] BUG: using smp_processor_id() in preemptible [00000000] code: events/1/10 [ 26.864000] caller is vmstat_update+0x3c/0x70 Because this commit: a1f84a3: sched: Check for an idle shared cache in select_task_rq_fair() broke ->cpus_allowed. Signed-off-by: Mike Galbraith Cc: Peter Zijlstra Cc: arjan@infradead.org Cc: LKML-Reference: <1257415066.12867.1.camel@marge.simson.net> Signed-off-by: Ingo Molnar diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index da87385..e4d4483 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1389,6 +1389,8 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag if (candidate == -1 || candidate == cpu) { for_each_cpu(i, sched_domain_span(tmp)) { + if (!cpumask_test_cpu(i, &p->cpus_allowed)) + continue; if (!cpu_rq(i)->cfs.nr_running) { candidate = i; break; -- cgit v0.10.2 From 2da3e160cb3d226d87b907fab26850d838ed8d7c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Nov 2009 23:06:50 +0100 Subject: hw-breakpoint: Move asm-generic/hw_breakpoint.h to linux/hw_breakpoint.h We plan to make the breakpoints parameters generic among architectures. For that it's better to move the asm-generic header to a generic linux header. Signed-off-by: Frederic Weisbecker diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 1acb4d4..3cfca8e 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -12,7 +12,7 @@ struct arch_hw_breakpoint { }; #include -#include +#include /* Available HW breakpoint length encodings */ #define HW_BREAKPOINT_LEN_1 0x40 diff --git a/include/asm-generic/hw_breakpoint.h b/include/asm-generic/hw_breakpoint.h deleted file mode 100644 index 9bf2d12..0000000 --- a/include/asm-generic/hw_breakpoint.h +++ /dev/null @@ -1,139 +0,0 @@ -#ifndef _ASM_GENERIC_HW_BREAKPOINT_H -#define _ASM_GENERIC_HW_BREAKPOINT_H - -#ifndef __ARCH_HW_BREAKPOINT_H -#error "Please don't include this file directly" -#endif - -#ifdef __KERNEL__ -#include -#include -#include - -/** - * struct hw_breakpoint - unified kernel/user-space hardware breakpoint - * @triggered: callback invoked after target address access - * @info: arch-specific breakpoint info (address, length, and type) - * - * %hw_breakpoint structures are the kernel's way of representing - * hardware breakpoints. These are data breakpoints - * (also known as "watchpoints", triggered on data access), and the breakpoint's - * target address can be located in either kernel space or user space. - * - * The breakpoint's address, length, and type are highly - * architecture-specific. The values are encoded in the @info field; you - * specify them when registering the breakpoint. To examine the encoded - * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared - * below. - * - * The address is specified as a regular kernel pointer (for kernel-space - * breakponts) or as an %__user pointer (for user-space breakpoints). - * With register_user_hw_breakpoint(), the address must refer to a - * location in user space. The breakpoint will be active only while the - * requested task is running. Conversely with - * register_kernel_hw_breakpoint(), the address must refer to a location - * in kernel space, and the breakpoint will be active on all CPUs - * regardless of the current task. - * - * The length is the breakpoint's extent in bytes, which is subject to - * certain limitations. include/asm/hw_breakpoint.h contains macros - * defining the available lengths for a specific architecture. Note that - * the address's alignment must match the length. The breakpoint will - * catch accesses to any byte in the range from address to address + - * (length - 1). - * - * The breakpoint's type indicates the sort of access that will cause it - * to trigger. Possible values may include: - * - * %HW_BREAKPOINT_RW (triggered on read or write access), - * %HW_BREAKPOINT_WRITE (triggered on write access), and - * %HW_BREAKPOINT_READ (triggered on read access). - * - * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all - * possibilities are available on all architectures. Execute breakpoints - * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. - * - * When a breakpoint gets hit, the @triggered callback is - * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the - * processor registers. - * Data breakpoints occur after the memory access has taken place. - * Breakpoints are disabled during execution @triggered, to avoid - * recursive traps and allow unhindered access to breakpointed memory. - * - * This sample code sets a breakpoint on pid_max and registers a callback - * function for writes to that variable. Note that it is not portable - * as written, because not all architectures support HW_BREAKPOINT_LEN_4. - * - * ---------------------------------------------------------------------- - * - * #include - * - * struct hw_breakpoint my_bp; - * - * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) - * { - * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); - * dump_stack(); - * ............... - * } - * - * static struct hw_breakpoint my_bp; - * - * static int init_module(void) - * { - * ...................... - * my_bp.info.type = HW_BREAKPOINT_WRITE; - * my_bp.info.len = HW_BREAKPOINT_LEN_4; - * - * my_bp.installed = (void *)my_bp_installed; - * - * rc = register_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * static void cleanup_module(void) - * { - * ...................... - * unregister_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * ---------------------------------------------------------------------- - */ -struct hw_breakpoint { - void (*triggered)(struct hw_breakpoint *, struct pt_regs *); - struct arch_hw_breakpoint info; -}; - -/* - * len and type values are defined in include/asm/hw_breakpoint.h. - * Available values vary according to the architecture. On i386 the - * possibilities are: - * - * HW_BREAKPOINT_LEN_1 - * HW_BREAKPOINT_LEN_2 - * HW_BREAKPOINT_LEN_4 - * HW_BREAKPOINT_RW - * HW_BREAKPOINT_READ - * - * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the - * 1-, 2-, and 4-byte lengths may be unavailable. There also may be - * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. - */ - -extern int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern int modify_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -/* - * Kernel breakpoints are not associated with any particular thread. - */ -extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); -extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); - -extern unsigned int hbp_kernel_pos; - -#endif /* __KERNEL__ */ -#endif /* _ASM_GENERIC_HW_BREAKPOINT_H */ diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h new file mode 100644 index 0000000..61ccc8f --- /dev/null +++ b/include/linux/hw_breakpoint.h @@ -0,0 +1,136 @@ +#ifndef _LINUX_HW_BREAKPOINT_H +#define _LINUX_HW_BREAKPOINT_H + + +#ifdef __KERNEL__ +#include +#include +#include + +/** + * struct hw_breakpoint - unified kernel/user-space hardware breakpoint + * @triggered: callback invoked after target address access + * @info: arch-specific breakpoint info (address, length, and type) + * + * %hw_breakpoint structures are the kernel's way of representing + * hardware breakpoints. These are data breakpoints + * (also known as "watchpoints", triggered on data access), and the breakpoint's + * target address can be located in either kernel space or user space. + * + * The breakpoint's address, length, and type are highly + * architecture-specific. The values are encoded in the @info field; you + * specify them when registering the breakpoint. To examine the encoded + * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared + * below. + * + * The address is specified as a regular kernel pointer (for kernel-space + * breakponts) or as an %__user pointer (for user-space breakpoints). + * With register_user_hw_breakpoint(), the address must refer to a + * location in user space. The breakpoint will be active only while the + * requested task is running. Conversely with + * register_kernel_hw_breakpoint(), the address must refer to a location + * in kernel space, and the breakpoint will be active on all CPUs + * regardless of the current task. + * + * The length is the breakpoint's extent in bytes, which is subject to + * certain limitations. include/asm/hw_breakpoint.h contains macros + * defining the available lengths for a specific architecture. Note that + * the address's alignment must match the length. The breakpoint will + * catch accesses to any byte in the range from address to address + + * (length - 1). + * + * The breakpoint's type indicates the sort of access that will cause it + * to trigger. Possible values may include: + * + * %HW_BREAKPOINT_RW (triggered on read or write access), + * %HW_BREAKPOINT_WRITE (triggered on write access), and + * %HW_BREAKPOINT_READ (triggered on read access). + * + * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all + * possibilities are available on all architectures. Execute breakpoints + * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. + * + * When a breakpoint gets hit, the @triggered callback is + * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the + * processor registers. + * Data breakpoints occur after the memory access has taken place. + * Breakpoints are disabled during execution @triggered, to avoid + * recursive traps and allow unhindered access to breakpointed memory. + * + * This sample code sets a breakpoint on pid_max and registers a callback + * function for writes to that variable. Note that it is not portable + * as written, because not all architectures support HW_BREAKPOINT_LEN_4. + * + * ---------------------------------------------------------------------- + * + * #include + * + * struct hw_breakpoint my_bp; + * + * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) + * { + * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); + * dump_stack(); + * ............... + * } + * + * static struct hw_breakpoint my_bp; + * + * static int init_module(void) + * { + * ...................... + * my_bp.info.type = HW_BREAKPOINT_WRITE; + * my_bp.info.len = HW_BREAKPOINT_LEN_4; + * + * my_bp.installed = (void *)my_bp_installed; + * + * rc = register_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * static void cleanup_module(void) + * { + * ...................... + * unregister_kernel_hw_breakpoint(&my_bp); + * ...................... + * } + * + * ---------------------------------------------------------------------- + */ +struct hw_breakpoint { + void (*triggered)(struct hw_breakpoint *, struct pt_regs *); + struct arch_hw_breakpoint info; +}; + +/* + * len and type values are defined in include/asm/hw_breakpoint.h. + * Available values vary according to the architecture. On i386 the + * possibilities are: + * + * HW_BREAKPOINT_LEN_1 + * HW_BREAKPOINT_LEN_2 + * HW_BREAKPOINT_LEN_4 + * HW_BREAKPOINT_RW + * HW_BREAKPOINT_READ + * + * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the + * 1-, 2-, and 4-byte lengths may be unavailable. There also may be + * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. + */ + +extern int register_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern int modify_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +extern void unregister_user_hw_breakpoint(struct task_struct *tsk, + struct hw_breakpoint *bp); +/* + * Kernel breakpoints are not associated with any particular thread. + */ +extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); + +extern unsigned int hbp_kernel_pos; + +#endif /* __KERNEL__ */ +#endif /* _LINUX_HW_BREAKPOINT_H */ -- cgit v0.10.2 From c426bba069e65ea438880a04aa4e7c5b880e1728 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Thu, 5 Nov 2009 09:31:31 +0900 Subject: perf bench: Add new directory and header for new subcommand 'bench' This patch adds bench/ directory and bench/bench.h. bench/ directory will contain modules for bench subcommand. bench/bench.h is for listing prototypes of module functions. Signed-off-by: Hitoshi Mitake Cc: Rusty Russell Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: fweisbec@gmail.com Cc: Jiri Kosina LKML-Reference: <1257381097-4743-2-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h new file mode 100644 index 0000000..59adb27 --- /dev/null +++ b/tools/perf/bench/bench.h @@ -0,0 +1,9 @@ +#ifndef BENCH_H +#define BENCH_H + +extern int bench_sched_messaging(int argc, const char **argv, + const char *prefix); +extern int bench_sched_pipe(int argc, const char **argv, + const char *prefix); + +#endif -- cgit v0.10.2 From e27454cc6352c4226ddc76f5e3a5dedd7dff456a Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Thu, 5 Nov 2009 09:31:32 +0900 Subject: perf bench: Add sched-messaging.c: Benchmark for scheduler and IPC mechanisms based on hackbench This patch adds bench/sched-messaging.c. This benchmark measures performance of scheduler and IPC mechanisms, and is based on hackbench by Rusty Russell. Example of usage: % perf bench sched messaging -g 20 -l 1000 -s 5.432 # in sec % perf bench sched messaging # run with default options (20 sender and receiver processes per group) (10 groups == 400 processes run) Total time:0.308 sec % perf bench sched messaging -t -g 20 # # be multi-thread, with 20 groups (20 sender and receiver threads per group) (20 groups == 800 threads run) Total time:0.582 sec ( Rusty is the original author of hackbench.c and he said the code is and was under the GPLv2 so fine to be merged. ) Signed-off-by: Hitoshi Mitake Acked-by: Rusty Russell Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: fweisbec@gmail.com Cc: Jiri Kosina LKML-Reference: <1257381097-4743-3-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c new file mode 100644 index 0000000..36b62c5 --- /dev/null +++ b/tools/perf/bench/sched-messaging.c @@ -0,0 +1,332 @@ +/* + * + * builtin-bench-messaging.c + * + * messaging: Benchmark for scheduler and IPC mechanisms + * + * Based on hackbench by Rusty Russell + * Ported to perf by Hitoshi Mitake + * + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../builtin.h" +#include "bench.h" + +/* Test groups of 20 processes spraying to 20 receivers */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DATASIZE 100 + +static int use_pipes = 0; +static unsigned int loops = 100; +static unsigned int thread_mode = 0; +static unsigned int num_groups = 10; +static int simple = 0; + +struct sender_context { + unsigned int num_fds; + int ready_out; + int wakefd; + int out_fds[0]; +}; + +struct receiver_context { + unsigned int num_packets; + int in_fds[2]; + int ready_out; + int wakefd; +}; + +static void barf(const char *msg) +{ + fprintf(stderr, "%s (error: %s)\n", msg, strerror(errno)); + exit(1); +} + +static void fdpair(int fds[2]) +{ + if (use_pipes) { + if (pipe(fds) == 0) + return; + } else { + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0) + return; + } + + barf(use_pipes ? "pipe()" : "socketpair()"); +} + +/* Block until we're ready to go */ +static void ready(int ready_out, int wakefd) +{ + char dummy; + struct pollfd pollfd = { .fd = wakefd, .events = POLLIN }; + + /* Tell them we're ready. */ + if (write(ready_out, &dummy, 1) != 1) + barf("CLIENT: ready write"); + + /* Wait for "GO" signal */ + if (poll(&pollfd, 1, -1) != 1) + barf("poll"); +} + +/* Sender sprays loops messages down each file descriptor */ +static void *sender(struct sender_context *ctx) +{ + char data[DATASIZE]; + unsigned int i, j; + + ready(ctx->ready_out, ctx->wakefd); + + /* Now pump to every receiver. */ + for (i = 0; i < loops; i++) { + for (j = 0; j < ctx->num_fds; j++) { + int ret, done = 0; + +again: + ret = write(ctx->out_fds[j], data + done, + sizeof(data)-done); + if (ret < 0) + barf("SENDER: write"); + done += ret; + if (done < DATASIZE) + goto again; + } + } + + return NULL; +} + + +/* One receiver per fd */ +static void *receiver(struct receiver_context* ctx) +{ + unsigned int i; + + if (!thread_mode) + close(ctx->in_fds[1]); + + /* Wait for start... */ + ready(ctx->ready_out, ctx->wakefd); + + /* Receive them all */ + for (i = 0; i < ctx->num_packets; i++) { + char data[DATASIZE]; + int ret, done = 0; + +again: + ret = read(ctx->in_fds[0], data + done, DATASIZE - done); + if (ret < 0) + barf("SERVER: read"); + done += ret; + if (done < DATASIZE) + goto again; + } + + return NULL; +} + +static pthread_t create_worker(void *ctx, void *(*func)(void *)) +{ + pthread_attr_t attr; + pthread_t childid; + int err; + + if (!thread_mode) { + /* process mode */ + /* Fork the receiver. */ + switch (fork()) { + case -1: + barf("fork()"); + break; + case 0: + (*func) (ctx); + exit(0); + break; + default: + break; + } + + return (pthread_t)0; + } + + if (pthread_attr_init(&attr) != 0) + barf("pthread_attr_init:"); + +#ifndef __ia64__ + if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0) + barf("pthread_attr_setstacksize"); +#endif + + err = pthread_create(&childid, &attr, func, ctx); + if (err != 0) { + fprintf(stderr, "pthread_create failed: %s (%d)\n", + strerror(err), err); + exit(-1); + } + return childid; +} + +static void reap_worker(pthread_t id) +{ + int proc_status; + void *thread_status; + + if (!thread_mode) { + /* process mode */ + wait(&proc_status); + if (!WIFEXITED(proc_status)) + exit(1); + } else { + pthread_join(id, &thread_status); + } +} + +/* One group of senders and receivers */ +static unsigned int group(pthread_t *pth, + unsigned int num_fds, + int ready_out, + int wakefd) +{ + unsigned int i; + struct sender_context *snd_ctx = malloc(sizeof(struct sender_context) + + num_fds * sizeof(int)); + + if (!snd_ctx) + barf("malloc()"); + + for (i = 0; i < num_fds; i++) { + int fds[2]; + struct receiver_context *ctx = malloc(sizeof(*ctx)); + + if (!ctx) + barf("malloc()"); + + + /* Create the pipe between client and server */ + fdpair(fds); + + ctx->num_packets = num_fds * loops; + ctx->in_fds[0] = fds[0]; + ctx->in_fds[1] = fds[1]; + ctx->ready_out = ready_out; + ctx->wakefd = wakefd; + + pth[i] = create_worker(ctx, (void *)receiver); + + snd_ctx->out_fds[i] = fds[1]; + if (!thread_mode) + close(fds[0]); + } + + /* Now we have all the fds, fork the senders */ + for (i = 0; i < num_fds; i++) { + snd_ctx->ready_out = ready_out; + snd_ctx->wakefd = wakefd; + snd_ctx->num_fds = num_fds; + + pth[num_fds+i] = create_worker(snd_ctx, (void *)sender); + } + + /* Close the fds we have left */ + if (!thread_mode) + for (i = 0; i < num_fds; i++) + close(snd_ctx->out_fds[i]); + + /* Return number of children to reap */ + return num_fds * 2; +} + +static const struct option options[] = { + OPT_BOOLEAN('p', "pipe", &use_pipes, + "Use pipe() instead of socketpair()"), + OPT_BOOLEAN('t', "thread", &thread_mode, + "Be multi thread instead of multi process"), + OPT_INTEGER('g', "group", &num_groups, + "Specify number of groups"), + OPT_INTEGER('l', "loop", &loops, + "Specify number of loops"), + OPT_BOOLEAN('s', "simple-output", &simple, + "Do simple output (this maybe useful for" + "processing by scripts or graph tools like gnuplot)"), + OPT_END() +}; + +static const char * const bench_sched_message_usage[] = { + "perf bench sched messaging ", + NULL +}; + +int bench_sched_messaging(int argc, const char **argv, + const char *prefix __used) +{ + unsigned int i, total_children; + struct timeval start, stop, diff; + unsigned int num_fds = 20; + int readyfds[2], wakefds[2]; + char dummy; + pthread_t *pth_tab; + + argc = parse_options(argc, argv, options, + bench_sched_message_usage, 0); + + pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t)); + if (!pth_tab) + barf("main:malloc()"); + + fdpair(readyfds); + fdpair(wakefds); + + total_children = 0; + for (i = 0; i < num_groups; i++) + total_children += group(pth_tab+total_children, num_fds, + readyfds[1], wakefds[0]); + + /* Wait for everyone to be ready */ + for (i = 0; i < total_children; i++) + if (read(readyfds[0], &dummy, 1) != 1) + barf("Reading for readyfds"); + + gettimeofday(&start, NULL); + + /* Kick them off */ + if (write(wakefds[1], &dummy, 1) != 1) + barf("Writing to start them"); + + /* Reap them all */ + for (i = 0; i < total_children; i++) + reap_worker(pth_tab[i]); + + gettimeofday(&stop, NULL); + + timersub(&stop, &start, &diff); + + if (simple) + printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000); + else { + printf("(%d sender and receiver %s per group)\n", + num_fds, thread_mode ? "threads" : "processes"); + printf("(%d groups == %d %s run)\n\n", + num_groups, num_groups * 2 * num_fds, + thread_mode ? "threads" : "processes"); + printf("\tTotal time:%lu.%03lu sec\n", + diff.tv_sec, diff.tv_usec/1000); + } + + return 0; +} -- cgit v0.10.2 From c7d9300f367f480aee4663a0e3695c5b48859a1a Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Thu, 5 Nov 2009 09:31:33 +0900 Subject: perf bench: Add sched-pipe.c: Benchmark for pipe() system call This patch adds bench/sched-pipe.c. bench/sched-pipe.c is a benchmark program to measure performance of pipe() system call. This benchmark is based on pipe-test-1m.c by Ingo Molnar: http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c Example of use: % perf bench sched pipe (executing 1000000 pipe operations between two tasks) Total time:4.499 sec 4.499179 usecs/op 222262 ops/sec % perf bench sched pipe -s -l 1000 0.015 Signed-off-by: Hitoshi Mitake Cc: Rusty Russell Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: fweisbec@gmail.com Cc: Jiri Kosina LKML-Reference: <1257381097-4743-4-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c new file mode 100644 index 0000000..3214ed2 --- /dev/null +++ b/tools/perf/bench/sched-pipe.c @@ -0,0 +1,113 @@ +/* + * + * builtin-bench-pipe.c + * + * pipe: Benchmark for pipe() + * + * Based on pipe-test-1m.c by Ingo Molnar + * http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c + * Ported to perf by Hitoshi Mitake + * + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../builtin.h" +#include "bench.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LOOPS_DEFAULT 1000000 +static int loops = LOOPS_DEFAULT; +static int simple = 0; + +static const struct option options[] = { + OPT_INTEGER('l', "loop", &loops, + "Specify number of loops"), + OPT_BOOLEAN('s', "simple-output", &simple, + "Do simple output (this maybe useful for" + "processing by scripts or graph tools like gnuplot)"), + OPT_END() +}; + +static const char * const bench_sched_pipe_usage[] = { + "perf bench sched pipe ", + NULL +}; + +int bench_sched_pipe(int argc, const char **argv, + const char *prefix __used) +{ + int pipe_1[2], pipe_2[2]; + int m = 0, i; + struct timeval start, stop, diff; + unsigned long long result_usec = 0; + + /* + * why does "ret" exist? + * discarding returned value of read(), write() + * causes error in building environment for perf + */ + int ret; + pid_t pid; + + argc = parse_options(argc, argv, options, + bench_sched_pipe_usage, 0); + + assert(!pipe(pipe_1)); + assert(!pipe(pipe_2)); + + pid = fork(); + assert(pid >= 0); + + gettimeofday(&start, NULL); + + if (!pid) { + for (i = 0; i < loops; i++) { + ret = read(pipe_1[0], &m, sizeof(int)); + ret = write(pipe_2[1], &m, sizeof(int)); + } + } else { + for (i = 0; i < loops; i++) { + ret = write(pipe_1[1], &m, sizeof(int)); + ret = read(pipe_2[0], &m, sizeof(int)); + } + } + + gettimeofday(&stop, NULL); + timersub(&stop, &start, &diff); + + if (pid) + return 0; + + if (simple) + printf("%lu.%03lu\n", + diff.tv_sec, diff.tv_usec / 1000); + else { + printf("(executing %d pipe operations between two tasks)\n\n", + loops); + + result_usec = diff.tv_sec * 1000000; + result_usec += diff.tv_usec; + + printf("\tTotal time:%lu.%03lu sec\n", + diff.tv_sec, diff.tv_usec / 1000); + printf("\t\t%lf usecs/op\n", + (double)result_usec / (double)loops); + printf("\t\t%d ops/sec\n", + (int)((double)loops / + ((double)result_usec / (double)1000000))); + } + + return 0; +} -- cgit v0.10.2 From 629cc356653719c206a05f4dee5c5e242edb6546 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Thu, 5 Nov 2009 09:31:34 +0900 Subject: perf bench: Add builtin-bench.c: General framework for benchmark suites This patch adds builtin-bench.c builtin-bench.c is a general framework for benchmark suites. Signed-off-by: Hitoshi Mitake Cc: Rusty Russell Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: fweisbec@gmail.com Cc: Jiri Kosina LKML-Reference: <1257381097-4743-5-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c new file mode 100644 index 0000000..31f4164 --- /dev/null +++ b/tools/perf/builtin-bench.c @@ -0,0 +1,128 @@ +/* + * + * builtin-bench.c + * + * General benchmarking subsystem provided by perf + * + * Copyright (C) 2009, Hitoshi Mitake + * + */ + +/* + * + * Available subsystem list: + * sched ... scheduler and IPC mechanism + * + */ + +#include "perf.h" +#include "util/util.h" +#include "util/parse-options.h" +#include "builtin.h" +#include "bench/bench.h" + +#include +#include +#include + +struct bench_suite { + const char *name; + const char *summary; + int (*fn)(int, const char **, const char *); +}; + +static struct bench_suite sched_suites[] = { + { "messaging", + "Benchmark for scheduler and IPC mechanisms", + bench_sched_messaging }, + { "pipe", + "Flood of communication over pipe() between two processes", + bench_sched_pipe }, + { NULL, + NULL, + NULL } +}; + +struct bench_subsys { + const char *name; + const char *summary; + struct bench_suite *suites; +}; + +static struct bench_subsys subsystems[] = { + { "sched", + "scheduler and IPC mechanism", + sched_suites }, + { NULL, + NULL, + NULL } +}; + +static void dump_suites(int subsys_index) +{ + int i; + + printf("List of available suites for %s...\n\n", + subsystems[subsys_index].name); + + for (i = 0; subsystems[subsys_index].suites[i].name; i++) + printf("\t%s: %s\n", + subsystems[subsys_index].suites[i].name, + subsystems[subsys_index].suites[i].summary); + + printf("\n"); + return; +} + +int cmd_bench(int argc, const char **argv, const char *prefix __used) +{ + int i, j, status = 0; + + if (argc < 2) { + /* No subsystem specified. */ + printf("Usage: perf bench []\n\n"); + printf("List of available subsystems...\n\n"); + + for (i = 0; subsystems[i].name; i++) + printf("\t%s: %s\n", + subsystems[i].name, subsystems[i].summary); + printf("\n"); + + goto end; + } + + for (i = 0; subsystems[i].name; i++) { + if (strcmp(subsystems[i].name, argv[1])) + continue; + + if (argc < 3) { + /* No suite specified. */ + dump_suites(i); + goto end; + } + + for (j = 0; subsystems[i].suites[j].name; j++) { + if (strcmp(subsystems[i].suites[j].name, argv[2])) + continue; + + status = subsystems[i].suites[j].fn(argc - 2, + argv + 2, prefix); + goto end; + } + + if (!strcmp(argv[2], "-h") || !strcmp(argv[2], "--help")) { + dump_suites(i); + goto end; + } + + printf("Unknown suite:%s for %s\n", argv[2], argv[1]); + status = 1; + goto end; + } + + printf("Unknown subsystem:%s\n", argv[1]); + status = 1; + +end: + return status; +} -- cgit v0.10.2 From 11bd341c043348ecb7462d3bd8e1ad6d00f6892a Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Thu, 5 Nov 2009 09:31:35 +0900 Subject: perf bench: Modify builtin.h for new prototype This patch modifies builtin.h to add prototype of cmd_bench(). Signed-off-by: Hitoshi Mitake Cc: Rusty Russell Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: fweisbec@gmail.com Cc: Jiri Kosina LKML-Reference: <1257381097-4743-6-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index e11d8d2..f0cd5b1 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -15,6 +15,7 @@ extern int read_line_with_nul(char *buf, int size, FILE *file); extern int check_pager_config(const char *cmd); extern int cmd_annotate(int argc, const char **argv, const char *prefix); +extern int cmd_bench(int argc, const char **argv, const char *prefix); extern int cmd_help(int argc, const char **argv, const char *prefix); extern int cmd_sched(int argc, const char **argv, const char *prefix); extern int cmd_list(int argc, const char **argv, const char *prefix); -- cgit v0.10.2 From dcba8848d3bc83ec9ee0858b9ae6e4f1c1fa7fa3 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Thu, 5 Nov 2009 09:31:36 +0900 Subject: perf bench: Add new subcommand 'bench' to perf.c This patch modifies perf.c for invoking 'bench' subcommand. Signed-off-by: Hitoshi Mitake Cc: Rusty Russell Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: fweisbec@gmail.com Cc: Jiri Kosina LKML-Reference: <1257381097-4743-7-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 624e62d..f90ca5e 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -288,6 +288,7 @@ static void handle_internal_command(int argc, const char **argv) { "list", cmd_list, 0 }, { "record", cmd_record, 0 }, { "report", cmd_report, 0 }, + { "bench", cmd_bench, 0 }, { "stat", cmd_stat, 0 }, { "timechart", cmd_timechart, 0 }, { "top", cmd_top, 0 }, -- cgit v0.10.2 From bfde82ef51e3ea6ab8634d0fdbf5adcdd1b429cb Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Thu, 5 Nov 2009 09:31:37 +0900 Subject: perf bench: Add subcommand 'bench' to the Makefile This patch modifies Makefile for new files related to 'bench' subcommand. The new code is active from this point on. Signed-off-by: Hitoshi Mitake Cc: Rusty Russell Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: fweisbec@gmail.com Cc: Jiri Kosina LKML-Reference: <1257381097-4743-8-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 542b29e..0a25428 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -416,6 +416,13 @@ LIB_OBJS += util/hist.o LIB_OBJS += util/data_map.o BUILTIN_OBJS += builtin-annotate.o + +BUILTIN_OBJS += builtin-bench.o + +# Benchmark modules +BUILTIN_OBJS += bench/sched-messaging.o +BUILTIN_OBJS += bench/sched-pipe.o + BUILTIN_OBJS += builtin-help.o BUILTIN_OBJS += builtin-sched.o BUILTIN_OBJS += builtin-list.o -- cgit v0.10.2 From 444a2a3bcd6d5bed5c823136f68fcc93c0fe283f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 6 Nov 2009 04:13:05 +0100 Subject: tracing, perf_events: Protect the buffer from recursion in perf While tracing using events with perf, if one enables the lockdep:lock_acquire event, it will infect every other perf trace events. Basically, you can enable whatever set of trace events through perf but if this event is part of the set, the only result we can get is a long list of lock_acquire events of rcu read lock, and only that. This is because of a recursion inside perf. 1) When a trace event is triggered, it will fill a per cpu buffer and submit it to perf. 2) Perf will commit this event but will also protect some data using rcu_read_lock 3) A recursion appears: rcu_read_lock triggers a lock_acquire event that will fill the per cpu event and then submit the buffer to perf. 4) Perf detects a recursion and ignores it 5) Perf continues its work on the previous event, but its buffer has been overwritten by the lock_acquire event, it has then been turned into a lock_acquire event of rcu read lock Such scenario also happens with lock_release with rcu_read_unlock(). We could turn the rcu_read_lock() into __rcu_read_lock() to drop the lock debugging from perf fast path, but that would make us lose the rcu debugging and that doesn't prevent from other possible kind of recursion from perf in the future. This patch adds a recursion protection based on a counter on the perf trace per cpu buffers to solve the problem. -v2: Fixed lost whitespace, added reviewed-by tag Signed-off-by: Frederic Weisbecker Reviewed-by: Masami Hiramatsu Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt Cc: Li Zefan Cc: Jason Baron LKML-Reference: <1257477185-7838-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index f7b47c3..43360c1 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -137,8 +137,13 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -extern char *trace_profile_buf; -extern char *trace_profile_buf_nmi; +struct perf_trace_buf { + char buf[FTRACE_MAX_PROFILE_SIZE]; + int recursion; +}; + +extern struct perf_trace_buf *perf_trace_buf; +extern struct perf_trace_buf *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a7f9460..4945d1c9 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -649,6 +649,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * struct ftrace_event_call *event_call = &event_; * extern void perf_tp_event(int, u64, u64, void *, int); * struct ftrace_raw_##call *entry; + * struct perf_trace_buf *trace_buf; * u64 __addr = 0, __count = 1; * unsigned long irq_flags; * struct trace_entry *ent; @@ -673,14 +674,25 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * __cpu = smp_processor_id(); * * if (in_nmi()) - * raw_data = rcu_dereference(trace_profile_buf_nmi); + * trace_buf = rcu_dereference(perf_trace_buf_nmi); * else - * raw_data = rcu_dereference(trace_profile_buf); + * trace_buf = rcu_dereference(perf_trace_buf); * - * if (!raw_data) + * if (!trace_buf) * goto end; * - * raw_data = per_cpu_ptr(raw_data, __cpu); + * trace_buf = per_cpu_ptr(trace_buf, __cpu); + * + * // Avoid recursion from perf that could mess up the buffer + * if (trace_buf->recursion++) + * goto end_recursion; + * + * raw_data = trace_buf->buf; + * + * // Make recursion update visible before entering perf_tp_event + * // so that we protect from perf recursions. + * + * barrier(); * * //zero dead bytes from alignment to avoid stack leak to userspace: * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; @@ -713,8 +725,9 @@ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_event_call *event_call = &event_##call; \ - extern void perf_tp_event(int, u64, u64, void *, int); \ + extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ + struct perf_trace_buf *trace_buf; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ @@ -739,14 +752,20 @@ static void ftrace_profile_##call(proto) \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ - raw_data = rcu_dereference(trace_profile_buf_nmi); \ + trace_buf = rcu_dereference(perf_trace_buf_nmi); \ else \ - raw_data = rcu_dereference(trace_profile_buf); \ + trace_buf = rcu_dereference(perf_trace_buf); \ \ - if (!raw_data) \ + if (!trace_buf) \ goto end; \ \ - raw_data = per_cpu_ptr(raw_data, __cpu); \ + trace_buf = per_cpu_ptr(trace_buf, __cpu); \ + if (trace_buf->recursion++) \ + goto end_recursion; \ + \ + barrier(); \ + \ + raw_data = trace_buf->buf; \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -761,6 +780,8 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ +end_recursion: \ + trace_buf->recursion--; \ end: \ local_irq_restore(irq_flags); \ \ diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index c9f687a..e0d351b 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -8,41 +8,36 @@ #include #include "trace.h" -/* - * We can't use a size but a type in alloc_percpu() - * So let's create a dummy type that matches the desired size - */ -typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t; -char *trace_profile_buf; -EXPORT_SYMBOL_GPL(trace_profile_buf); +struct perf_trace_buf *perf_trace_buf; +EXPORT_SYMBOL_GPL(perf_trace_buf); -char *trace_profile_buf_nmi; -EXPORT_SYMBOL_GPL(trace_profile_buf_nmi); +struct perf_trace_buf *perf_trace_buf_nmi; +EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); /* Count the events in use (per event id, not per instance) */ static int total_profile_count; static int ftrace_profile_enable_event(struct ftrace_event_call *event) { - char *buf; + struct perf_trace_buf *buf; int ret = -ENOMEM; if (atomic_inc_return(&event->profile_count)) return 0; if (!total_profile_count) { - buf = (char *)alloc_percpu(profile_buf_t); + buf = alloc_percpu(struct perf_trace_buf); if (!buf) goto fail_buf; - rcu_assign_pointer(trace_profile_buf, buf); + rcu_assign_pointer(perf_trace_buf, buf); - buf = (char *)alloc_percpu(profile_buf_t); + buf = alloc_percpu(struct perf_trace_buf); if (!buf) goto fail_buf_nmi; - rcu_assign_pointer(trace_profile_buf_nmi, buf); + rcu_assign_pointer(perf_trace_buf_nmi, buf); } ret = event->profile_enable(event); @@ -53,10 +48,10 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event) fail_buf_nmi: if (!total_profile_count) { - free_percpu(trace_profile_buf_nmi); - free_percpu(trace_profile_buf); - trace_profile_buf_nmi = NULL; - trace_profile_buf = NULL; + free_percpu(perf_trace_buf_nmi); + free_percpu(perf_trace_buf); + perf_trace_buf_nmi = NULL; + perf_trace_buf = NULL; } fail_buf: atomic_dec(&event->profile_count); @@ -84,7 +79,7 @@ int ftrace_profile_enable(int event_id) static void ftrace_profile_disable_event(struct ftrace_event_call *event) { - char *buf, *nmi_buf; + struct perf_trace_buf *buf, *nmi_buf; if (!atomic_add_negative(-1, &event->profile_count)) return; @@ -92,11 +87,11 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event) event->profile_disable(event); if (!--total_profile_count) { - buf = trace_profile_buf; - rcu_assign_pointer(trace_profile_buf, NULL); + buf = perf_trace_buf; + rcu_assign_pointer(perf_trace_buf, NULL); - nmi_buf = trace_profile_buf_nmi; - rcu_assign_pointer(trace_profile_buf_nmi, NULL); + nmi_buf = perf_trace_buf_nmi; + rcu_assign_pointer(perf_trace_buf_nmi, NULL); /* * Ensure every events in profiling have finished before diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index cf17a66..3696476 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1208,6 +1208,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; + struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; @@ -1229,14 +1230,26 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, __cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, __cpu); + trace_buf = per_cpu_ptr(trace_buf, __cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; entry = (struct kprobe_trace_entry *)raw_data; @@ -1249,8 +1262,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ip, 1, entry, size); + +end_recursion: + trace_buf->recursion--; end: local_irq_restore(irq_flags); + return 0; } @@ -1261,6 +1278,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; + struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; @@ -1282,14 +1300,26 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, __cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, __cpu); + trace_buf = per_cpu_ptr(trace_buf, __cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; + /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; entry = (struct kretprobe_trace_entry *)raw_data; @@ -1303,8 +1333,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, for (i = 0; i < tp->nr_args; i++) entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ret_ip, 1, entry, size); + +end_recursion: + trace_buf->recursion--; end: local_irq_restore(irq_flags); + return 0; } diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 58b8e53..51213b0 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -477,6 +477,7 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; + struct perf_trace_buf *trace_buf; struct syscall_trace_enter *rec; unsigned long flags; char *raw_data; @@ -507,14 +508,25 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, cpu); + trace_buf = per_cpu_ptr(trace_buf, cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -527,6 +539,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) (unsigned long *)&rec->args); perf_tp_event(sys_data->enter_id, 0, 1, rec, size); +end_recursion: + trace_buf->recursion--; end: local_irq_restore(flags); } @@ -574,6 +588,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; + struct perf_trace_buf *trace_buf; unsigned long flags; int syscall_nr; char *raw_data; @@ -605,14 +620,25 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) cpu = smp_processor_id(); if (in_nmi()) - raw_data = rcu_dereference(trace_profile_buf_nmi); + trace_buf = rcu_dereference(perf_trace_buf_nmi); else - raw_data = rcu_dereference(trace_profile_buf); + trace_buf = rcu_dereference(perf_trace_buf); - if (!raw_data) + if (!trace_buf) goto end; - raw_data = per_cpu_ptr(raw_data, cpu); + trace_buf = per_cpu_ptr(trace_buf, cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -626,6 +652,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); +end_recursion: + trace_buf->recursion--; end: local_irq_restore(flags); } -- cgit v0.10.2 From 8d06367fa79c053a4a56a2ce0bb9e840f5da1236 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 4 Nov 2009 18:50:43 -0200 Subject: perf symbols: Use the buildids if present With this change 'perf record' will intercept PERF_RECORD_MMAP calls, creating a linked list of DSOs, then when the session finishes, it will traverse this list and read the buildids, stashing them at the end of the file and will set up a new feature bit in the header bitmask. 'perf report' will then notice this feature and populate the 'dsos' list and set the build ids. When reading the symtabs it will refuse to load from a file that doesn't have the same build id. This improves the reliability of the profiler output, as symbols and profiling data is more guaranteed to match. Example: [root@doppio ~]# perf report | head /home/acme/bin/perf with build id b1ea544ac3746e7538972548a09aadecc5753868 not found, continuing without symbols # Samples: 2621434559 # # Overhead Command Shared Object Symbol # ........ ............... ............................. ...... # 7.91% init [kernel] [k] read_hpet 7.64% init [kernel] [k] mwait_idle_with_hints 7.60% swapper [kernel] [k] read_hpet 7.60% swapper [kernel] [k] mwait_idle_with_hints 3.65% init [kernel] [k] 0xffffffffa02339d9 [root@doppio ~]# In this case the 'perf' binary was an older one, vanished, so its symbols probably wouldn't match or would cause subtly different (and misleading) output. Next patches will support the kernel as well, reading the build id notes for it and the modules from /sys. Another patch should also introduce a new plumbing command: 'perf list-buildids' that will then be used in porcelain that is distro specific to fetch -debuginfo packages where such buildids are present. This will in turn allow for one to run 'perf record' in one machine and 'perf report' in another. Future work on having the buildid sent directly from the kernel in the PERF_RECORD_MMAP event is needed to close races, as the DSO can be changed during a 'perf record' session, but this patch at least helps with non-corner cases and current/older kernels. Signed-off-by: Arnaldo Carvalho de Melo Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: Jim Keniston Cc: K. Prasad Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Roland McGrath Cc: Srikar Dronamraju Cc: Steven Rostedt LKML-Reference: <1257367843-26224-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4a73d89..ab33381 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -17,6 +17,7 @@ #include "util/header.h" #include "util/event.h" #include "util/debug.h" +#include "util/symbol.h" #include #include @@ -109,9 +110,21 @@ static void write_output(void *buf, size_t size) } } +static void write_event(event_t *buf, size_t size) +{ + /* + * Add it to the list of DSOs, so that when we finish this + * record session we can pick the available build-ids. + */ + if (buf->header.type == PERF_RECORD_MMAP) + dsos__findnew(buf->mmap.filename); + + write_output(buf, size); +} + static int process_synthesized_event(event_t *event) { - write_output(event, event->header.size); + write_event(event, event->header.size); return 0; } @@ -163,14 +176,14 @@ static void mmap_read(struct mmap_data *md) size = md->mask + 1 - (old & md->mask); old += size; - write_output(buf, size); + write_event(buf, size); } buf = &data[old & md->mask]; size = head - old; old += size; - write_output(buf, size); + write_event(buf, size); md->prev = old; mmap_write_tail(md, old); @@ -365,10 +378,38 @@ static void open_counters(int cpu, pid_t pid) nr_cpu++; } +static bool write_buildid_table(void) +{ + struct dso *pos; + bool have_buildid = false; + + list_for_each_entry(pos, &dsos, node) { + struct build_id_event b; + size_t len; + + if (filename__read_build_id(pos->long_name, + &b.build_id, + sizeof(b.build_id)) < 0) + continue; + have_buildid = true; + memset(&b.header, 0, sizeof(b.header)); + len = strlen(pos->long_name) + 1; + len = ALIGN(len, 64); + b.header.size = sizeof(b) + len; + write_output(&b, sizeof(b)); + write_output(pos->long_name, len); + } + + return have_buildid; +} + static void atexit_header(void) { header->data_size += bytes_written; + if (write_buildid_table()) + perf_header__set_feat(header, HEADER_BUILD_ID); + perf_header__write(header, output); } @@ -572,6 +613,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) { int counter; + symbol__init(0); + argc = parse_options(argc, argv, options, record_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc && target_pid == -1 && !system_wide) diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index c458db9..00a9c11 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -70,6 +70,39 @@ process_event(event_t *event, unsigned long offset, unsigned long head) } } +static int perf_header__read_build_ids(const struct perf_header *self, + int input, off_t file_size) +{ + off_t offset = self->data_offset + self->data_size; + struct build_id_event bev; + char filename[PATH_MAX]; + int err = -1; + + if (lseek(input, offset, SEEK_SET) < 0) + return -1; + + while (offset < file_size) { + struct dso *dso; + ssize_t len; + + if (read(input, &bev, sizeof(bev)) != sizeof(bev)) + goto out; + + len = bev.header.size - sizeof(bev); + if (read(input, filename, len) != len) + goto out; + + dso = dsos__findnew(filename); + if (dso != NULL) + dso__set_build_id(dso, &bev.build_id); + + offset += bev.header.size; + } + err = 0; +out: + return err; +} + int mmap_dispatch_perf_file(struct perf_header **pheader, const char *input_name, int force, @@ -130,6 +163,10 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, if (curr_handler->sample_type_check(sample_type) < 0) exit(-1); + if (perf_header__has_feat(header, HEADER_BUILD_ID) && + perf_header__read_build_ids(header, input, input_stat.st_size)) + pr_debug("failed to read buildids, continuing...\n"); + if (load_kernel(NULL) < 0) { perror("failed to load kernel symbols"); return EXIT_FAILURE; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 0a443be..34c6fcb 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -61,6 +61,13 @@ struct sample_event{ u64 array[]; }; +#define BUILD_ID_SIZE 20 + +struct build_id_event { + struct perf_event_header header; + u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))]; + char filename[]; +}; typedef union event_union { struct perf_event_header header; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7d26659..050f543 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -149,6 +149,16 @@ void perf_header__feat_trace_info(struct perf_header *header) set_bit(HEADER_TRACE_INFO, header->adds_features); } +void perf_header__set_feat(struct perf_header *self, int feat) +{ + set_bit(feat, self->adds_features); +} + +bool perf_header__has_feat(const struct perf_header *self, int feat) +{ + return test_bit(feat, self->adds_features); +} + static void do_write(int fd, void *buf, size_t size) { while (size) { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 2ea9dfb..2f233c5 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -3,6 +3,7 @@ #include "../../../include/linux/perf_event.h" #include +#include #include "types.h" #include @@ -15,6 +16,7 @@ struct perf_header_attr { }; #define HEADER_TRACE_INFO 1 +#define HEADER_BUILD_ID 2 #define HEADER_FEAT_BITS 256 @@ -48,6 +50,8 @@ u64 perf_header__sample_type(struct perf_header *header); struct perf_event_attr * perf_header__find_attr(u64 id, struct perf_header *header); void perf_header__feat_trace_info(struct perf_header *header); +void perf_header__set_feat(struct perf_header *self, int feat); +bool perf_header__has_feat(const struct perf_header *self, int feat); struct perf_header *perf_header__new(void); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 33f8684..94ca950 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -84,8 +84,18 @@ map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter) int nr = dso__load(self->dso, self, filter); if (nr < 0) { - pr_warning("Failed to open %s, continuing without symbols\n", - self->dso->long_name); + if (self->dso->has_build_id) { + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; + + build_id__sprintf(self->dso->build_id, + sizeof(self->dso->build_id), + sbuild_id); + pr_warning("%s with build id %s not found", + self->dso->long_name, sbuild_id); + } else + pr_warning("Failed to open %s", + self->dso->long_name); + pr_warning(", continuing without symbols\n"); return NULL; } else if (nr == 0) { const char *name = self->dso->long_name; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e7c7cdb..a2e95ce 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -121,7 +121,8 @@ struct dso *dso__new(const char *name) self->find_symbol = dso__find_symbol; self->slen_calculated = 0; self->origin = DSO__ORIG_NOT_FOUND; - self->loaded = false; + self->loaded = 0; + self->has_build_id = 0; } return self; @@ -148,6 +149,12 @@ void dso__delete(struct dso *self) free(self); } +void dso__set_build_id(struct dso *self, void *build_id) +{ + memcpy(self->build_id, build_id, sizeof(self->build_id)); + self->has_build_id = 1; +} + static void dso__insert_symbol(struct dso *self, struct symbol *sym) { struct rb_node **p = &self->syms.rb_node; @@ -190,11 +197,30 @@ struct symbol *dso__find_symbol(struct dso *self, u64 ip) return NULL; } -size_t dso__fprintf(struct dso *self, FILE *fp) +int build_id__sprintf(u8 *self, int len, char *bf) { - size_t ret = fprintf(fp, "dso: %s\n", self->short_name); + char *bid = bf; + u8 *raw = self; + int i; + for (i = 0; i < len; ++i) { + sprintf(bid, "%02x", *raw); + ++raw; + bid += 2; + } + + return raw - self; +} + +size_t dso__fprintf(struct dso *self, FILE *fp) +{ + char sbuild_id[BUILD_ID_SIZE * 2 + 1]; struct rb_node *nd; + size_t ret; + + build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id); + ret = fprintf(fp, "dso: %s (%s)\n", self->short_name, sbuild_id); + for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); ret += symbol__fprintf(pos, fp); @@ -825,8 +851,6 @@ out_close: return err; } -#define BUILD_ID_SIZE 20 - int filename__read_build_id(const char *filename, void *bf, size_t size) { int fd, err = -1; @@ -845,7 +869,7 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); if (elf == NULL) { - pr_err("%s: cannot read %s ELF file.\n", __func__, filename); + pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); goto out_close; } @@ -874,9 +898,9 @@ out: static char *dso__read_build_id(struct dso *self) { - int i, len; - char *build_id = NULL, *bid; - unsigned char rawbf[BUILD_ID_SIZE], *raw; + int len; + char *build_id = NULL; + unsigned char rawbf[BUILD_ID_SIZE]; len = filename__read_build_id(self->long_name, rawbf, sizeof(rawbf)); if (len < 0) @@ -885,15 +909,8 @@ static char *dso__read_build_id(struct dso *self) build_id = malloc(len * 2 + 1); if (build_id == NULL) goto out; - bid = build_id; - raw = rawbf; - for (i = 0; i < len; ++i) { - sprintf(bid, "%02x", *raw); - ++raw; - bid += 2; - } - pr_debug2("%s(%s): %s\n", __func__, self->long_name, build_id); + build_id__sprintf(rawbf, len, build_id); out: return build_id; } @@ -922,7 +939,7 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) int ret = -1; int fd; - self->loaded = true; + self->loaded = 1; if (!name) return -1; @@ -940,6 +957,8 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) more: do { + int berr = 0; + self->origin++; switch (self->origin) { case DSO__ORIG_FEDORA: @@ -956,8 +975,7 @@ more: snprintf(name, size, "/usr/lib/debug/.build-id/%.2s/%s.debug", build_id, build_id + 2); - free(build_id); - break; + goto compare_build_id; } self->origin++; /* Fall thru */ @@ -969,6 +987,22 @@ more: goto out; } + if (self->has_build_id) { + bool match; + build_id = malloc(BUILD_ID_SIZE); + if (build_id == NULL) + goto more; + berr = filename__read_build_id(name, build_id, + BUILD_ID_SIZE); +compare_build_id: + match = berr > 0 && memcmp(build_id, self->build_id, + sizeof(self->build_id)) == 0; + free(build_id); + build_id = NULL; + if (!match) + goto more; + } + fd = open(name, O_RDONLY); } while (fd < 0); @@ -1034,7 +1068,7 @@ static int dso__load_module_sym(struct dso *self, struct map *map, { int err = 0, fd = open(self->long_name, O_RDONLY); - self->loaded = true; + self->loaded = 1; if (fd < 0) { pr_err("%s: cannot open %s\n", __func__, self->long_name); @@ -1225,7 +1259,7 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, { int err, fd = open(vmlinux, O_RDONLY); - self->loaded = true; + self->loaded = 1; if (fd < 0) return -1; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index e0d4a58..f8c1899 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -60,10 +60,12 @@ struct dso { struct list_head node; struct rb_root syms; struct symbol *(*find_symbol)(struct dso *, u64 ip); - unsigned char adjust_symbols; - unsigned char slen_calculated; - bool loaded; + u8 adjust_symbols:1; + u8 slen_calculated:1; + u8 loaded:1; + u8 has_build_id:1; unsigned char origin; + u8 build_id[BUILD_ID_SIZE]; const char *short_name; char *long_name; char name[0]; @@ -81,8 +83,10 @@ void dsos__fprintf(FILE *fp); size_t dso__fprintf(struct dso *self, FILE *fp); char dso__symtab_origin(const struct dso *self); +void dso__set_build_id(struct dso *self, void *build_id); int filename__read_build_id(const char *filename, void *bf, size_t size); +int build_id__sprintf(u8 *self, int len, char *bf); int load_kernel(symbol_filter_t filter); -- cgit v0.10.2 From c12a229bc5971534537a7d0e49e44f9f1f5d0336 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 5 Nov 2009 11:03:59 -0500 Subject: x86: Remove unused thread_return label from switch_to() Remove unused thread_return label from switch_to() macro on x86-64. Since this symbol cuts into schedule(), backtrace at the latter half of schedule() was always shown as thread_return(). Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20091105160359.5181.26225.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index f08f973..1a953e2 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -128,8 +128,6 @@ do { \ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ "call __switch_to\n\t" \ - ".globl thread_return\n" \ - "thread_return:\n\t" \ "movq "__percpu_arg([current_task])",%%rsi\n\t" \ __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ -- cgit v0.10.2 From 0d0fbbddcc27c062815732b38c44b544e656c799 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 5 Nov 2009 22:45:41 +1030 Subject: x86, msr, cpumask: Use struct cpumask rather than the deprecated cpumask_t This makes the declarations match the definitions, which already use 'struct cpumask'. Signed-off-by: Rusty Russell Acked-by: Borislav Petkov LKML-Reference: <200911052245.41803.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 9a00219..5bef931 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -264,12 +264,12 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) wrmsr(msr_no, l, h); return 0; } -static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no, +static inline void rdmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); } -static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no, +static inline void wrmsr_on_cpus(const struct cpumask *m, u32 msr_no, struct msr *msrs) { wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); -- cgit v0.10.2 From 338bac527ed0e35b4cb50390972f15d3cbce92ca Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 27 Oct 2009 16:34:44 +0900 Subject: x86: Use x86_platform for iommu_shutdown This patch cleans up pci_iommu_shutdown() a bit to use x86_platform (similar to how IA64 initializes an IOMMU driver). This adds iommu_shutdown() to x86_platform to avoid calling every IOMMUs' shutdown functions in pci_iommu_shutdown() in order. The IOMMU shutdown functions are platform specific (we don't have multiple different IOMMU hardware) so the current way is pointless. An IOMMU driver sets x86_platform.iommu_shutdown to the shutdown function if necessary. Signed-off-by: FUJITA Tomonori Cc: joerg.roedel@amd.com LKML-Reference: <20091027163358F.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 4b18089..3604669 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -30,12 +30,10 @@ extern void amd_iommu_detect(void); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); -extern void amd_iommu_shutdown(void); extern void amd_iommu_apply_erratum_63(u16 devid); #else static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } -static inline void amd_iommu_shutdown(void) { } #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 6cfdafa..4fdd5b3 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -36,7 +36,6 @@ extern int gart_iommu_aperture_disabled; extern void early_gart_iommu_check(void); extern void gart_iommu_init(void); -extern void gart_iommu_shutdown(void); extern void __init gart_parse_options(char *); extern void gart_iommu_hole_init(void); @@ -51,9 +50,6 @@ static inline void early_gart_iommu_check(void) static inline void gart_iommu_init(void) { } -static inline void gart_iommu_shutdown(void) -{ -} static inline void gart_parse_options(char *options) { } diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index fd6d21b..878b307 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern void pci_iommu_shutdown(void); +static inline void iommu_shutdown_noop(void) {} extern void no_iommu_init(void); extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2c756fd..66008ed 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -121,6 +121,7 @@ struct x86_platform_ops { unsigned long (*calibrate_tsc)(void); unsigned long (*get_wallclock)(void); int (*set_wallclock)(unsigned long nowtime); + void (*iommu_shutdown)(void); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c20001e..6acd43e 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1297,6 +1297,7 @@ int __init amd_iommu_init(void) else printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); + x86_platform.iommu_shutdown = disable_iommus; out: return ret; @@ -1323,11 +1324,6 @@ free: goto out; } -void amd_iommu_shutdown(void) -{ - disable_iommus(); -} - /**************************************************************************** * * Early detect code. This code runs at IOMMU detection time in the DMA diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 5e409dc..a4849c1 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -27,8 +27,7 @@ #include #include #include -#include - +#include #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) @@ -106,7 +105,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #endif #ifdef CONFIG_X86_64 - pci_iommu_shutdown(); + x86_platform.iommu_shutdown(); #endif crash_save_cpu(regs, safe_smp_processor_id()); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index b2a71dc..ce2fb91 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -303,13 +303,6 @@ static int __init pci_iommu_init(void) no_iommu_init(); return 0; } - -void pci_iommu_shutdown(void) -{ - gart_iommu_shutdown(); - - amd_iommu_shutdown(); -} /* Must execute after PCI subsystem */ rootfs_initcall(pci_iommu_init); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a7f1b64..a9bcdf7 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -39,6 +39,7 @@ #include #include #include +#include static unsigned long iommu_bus_base; /* GART remapping area (physical) */ static unsigned long iommu_size; /* size of remapping area bytes */ @@ -688,12 +689,12 @@ static struct dma_map_ops gart_dma_ops = { .free_coherent = gart_free_coherent, }; -void gart_iommu_shutdown(void) +static void gart_iommu_shutdown(void) { struct pci_dev *dev; int i; - if (no_agp && (dma_ops != &gart_dma_ops)) + if (no_agp) return; for (i = 0; i < num_k8_northbridges; i++) { @@ -838,6 +839,7 @@ void __init gart_iommu_init(void) flush_gart(); dma_ops = &gart_dma_ops; + x86_platform.iommu_shutdown = gart_iommu_shutdown; } void __init gart_parse_options(char *p) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f930787..2b97fc5 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -23,7 +23,7 @@ # include # include #else -# include +# include #endif /* @@ -622,7 +622,7 @@ void native_machine_shutdown(void) #endif #ifdef CONFIG_X86_64 - pci_iommu_shutdown(); + x86_platform.iommu_shutdown(); #endif } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 4449a4a..bc9b230 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -14,6 +14,7 @@ #include #include #include +#include void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } @@ -72,4 +73,5 @@ struct x86_platform_ops x86_platform = { .calibrate_tsc = native_calibrate_tsc, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, + .iommu_shutdown = iommu_shutdown_noop, }; -- cgit v0.10.2 From c82a43d40b93200a10a9fec0a489791e65e135ca Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 26 Oct 2009 23:28:11 +0300 Subject: irq: Do not attempt to create subdirectories if /proc/irq/ failed If a parent directory (ie /proc/irq/) could not be created we should not attempt to create subdirectories. Otherwise it would lead that "smp_affinity" and "spurious" entries are may be registered under /proc root instead of a proper place. Signed-off-by: Cyrill Gorcunov Cc: Rusty Russell Cc: Yinghai Lu LKML-Reference: <20091026202811.GD5321@lenovo> Signed-off-by: Ingo Molnar diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 692363d..dfef5b9 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -214,6 +214,8 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) /* create /proc/irq/1234 */ desc->dir = proc_mkdir(name, root_irq_dir); + if (!desc->dir) + return; #ifdef CONFIG_SMP /* create /proc/irq//smp_affinity */ -- cgit v0.10.2 From d8c80ce091f6ead6710bc71b58f2c32e5bf855e4 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 27 Oct 2009 15:45:23 +0800 Subject: sched, no_hz: Remove unused rq->last_tick_seen field In 15934a37324f32e0fda633dc7984a671ea81cd75, field last_tick_seen is added to struct rq. But it is unused now. Signed-off-by: Lai Jiangshan Cc: Guillaume Chazarain LKML-Reference: <4AE6A513.6010100@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index f849212..23e3535 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -534,7 +534,6 @@ struct rq { #define CPU_LOAD_IDX_MAX 5 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; #ifdef CONFIG_NO_HZ - unsigned long last_tick_seen; unsigned char in_nohz_recently; #endif /* capture load from *all* tasks on this cpu: */ -- cgit v0.10.2 From 2ae8bb75db1f3de422eb5898f2a063c46c36dba8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 26 Oct 2009 15:41:46 +0100 Subject: x86: Fix iommu=nodac parameter handling iommu=nodac should forbid dac instead of enabling it. Fix it. Signed-off-by: Tejun Heo Acked-by: FUJITA Tomonori Cc: Matteo Frigo Cc: # .32.x and older LKML-Reference: <4AE5B52A.4050408@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index ce2fb91..839d49a 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -216,7 +216,7 @@ static __init int iommu_setup(char *p) if (!strncmp(p, "allowdac", 8)) forbid_dac = 0; if (!strncmp(p, "nodac", 5)) - forbid_dac = -1; + forbid_dac = 1; if (!strncmp(p, "usedac", 6)) { forbid_dac = -1; return 1; -- cgit v0.10.2 From 24f1e32c60c45c89a997c73395b69c8af6f0a84e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2009 19:22:48 +0200 Subject: hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events This patch rebase the implementation of the breakpoints API on top of perf events instances. Each breakpoints are now perf events that handle the register scheduling, thread/cpu attachment, etc.. The new layering is now made as follows: ptrace kgdb ftrace perf syscall \ | / / \ | / / / Core breakpoint API / / | / | / Breakpoints perf events | | Breakpoints PMU ---- Debug Register constraints handling (Part of core breakpoint API) | | Hardware debug registers Reasons of this rewrite: - Use the centralized/optimized pmu registers scheduling, implying an easier arch integration - More powerful register handling: perf attributes (pinned/flexible events, exclusive/non-exclusive, tunable period, etc...) Impact: - New perf ABI: the hardware breakpoints counters - Ptrace breakpoints setting remains tricky and still needs some per thread breakpoints references. Todo (in the order): - Support breakpoints perf counter events for perf tools (ie: implement perf_bpcounter_event()) - Support from perf tools Changes in v2: - Follow the perf "event " rename - The ptrace regression have been fixed (ptrace breakpoint perf events weren't released when a task ended) - Drop the struct hw_breakpoint and store generic fields in perf_event_attr. - Separate core and arch specific headers, drop asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h - Use new generic len/type for breakpoint - Handle off case: when breakpoints api is not supported by an arch Changes in v3: - Fix broken CONFIG_KVM, we need to propagate the breakpoint api changes to kvm when we exit the guest and restore the bp registers to the host. Changes in v4: - Drop the hw_breakpoint_restore() stub as it is only used by KVM - EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a module - Restore the breakpoints unconditionally on kvm guest exit: TIF_DEBUG_THREAD doesn't anymore cover every cases of running breakpoints and vcpu->arch.switch_db_regs might not always be set when the guest used debug registers. (Waiting for a reliable optimization) Changes in v5: - Split-up the asm-generic/hw-breakpoint.h moving to linux/hw_breakpoint.h into a separate patch - Optimize the breakpoints restoring while switching from kvm guest to host. We only want to restore the state if we have active breakpoints to the host, otherwise we don't care about messed-up address registers. - Add asm/hw_breakpoint.h to Kbuild - Fix bad breakpoint type in trace_selftest.c Changes in v6: - Fix wrong header inclusion in trace.h (triggered a build error with CONFIG_FTRACE_SELFTEST Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt diff --git a/arch/Kconfig b/arch/Kconfig index acb66439..eef3bbb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -128,6 +128,9 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES config HAVE_HW_BREAKPOINT bool + depends on HAVE_PERF_EVENTS + select ANON_INODES + select PERF_EVENTS source "kernel/gcov/Kconfig" diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80c..9f828f8 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -10,6 +10,7 @@ header-y += ptrace-abi.h header-y += sigcontext32.h header-y += ucontext.h header-y += processor-flags.h +header-y += hw_breakpoint.h unifdef-y += e820.h unifdef-y += ist.h diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 23439fb..9a3333c 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -75,13 +75,8 @@ */ #ifdef __KERNEL__ -/* For process management */ -extern void flush_thread_hw_breakpoint(struct task_struct *tsk); -extern int copy_thread_hw_breakpoint(struct task_struct *tsk, - struct task_struct *child, unsigned long clone_flags); +DECLARE_PER_CPU(unsigned long, dr7); -/* For CPU management */ -extern void load_debug_registers(void); static inline void hw_breakpoint_disable(void) { /* Zero the control register for HW Breakpoint */ @@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } +#ifdef CONFIG_KVM +extern void hw_breakpoint_restore(void); +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 3cfca8e..0675a7c 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -4,6 +4,11 @@ #ifdef __KERNEL__ #define __ARCH_HW_BREAKPOINT_H +/* + * The name should probably be something dealt in + * a higher level. While dealing with the user + * (display/resolving) + */ struct arch_hw_breakpoint { char *name; /* Contains name of the symbol to set bkpt */ unsigned long address; @@ -12,44 +17,57 @@ struct arch_hw_breakpoint { }; #include -#include +#include +#include /* Available HW breakpoint length encodings */ -#define HW_BREAKPOINT_LEN_1 0x40 -#define HW_BREAKPOINT_LEN_2 0x44 -#define HW_BREAKPOINT_LEN_4 0x4c -#define HW_BREAKPOINT_LEN_EXECUTE 0x40 +#define X86_BREAKPOINT_LEN_1 0x40 +#define X86_BREAKPOINT_LEN_2 0x44 +#define X86_BREAKPOINT_LEN_4 0x4c +#define X86_BREAKPOINT_LEN_EXECUTE 0x40 #ifdef CONFIG_X86_64 -#define HW_BREAKPOINT_LEN_8 0x48 +#define X86_BREAKPOINT_LEN_8 0x48 #endif /* Available HW breakpoint type encodings */ /* trigger on instruction execute */ -#define HW_BREAKPOINT_EXECUTE 0x80 +#define X86_BREAKPOINT_EXECUTE 0x80 /* trigger on memory write */ -#define HW_BREAKPOINT_WRITE 0x81 +#define X86_BREAKPOINT_WRITE 0x81 /* trigger on memory read or write */ -#define HW_BREAKPOINT_RW 0x83 +#define X86_BREAKPOINT_RW 0x83 /* Total number of available HW breakpoint registers */ #define HBP_NUM 4 -extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; -DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); -extern unsigned int hbp_user_refcount[HBP_NUM]; +struct perf_event; +struct pmu; -extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk); -extern void arch_uninstall_thread_hw_breakpoint(void); extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); -extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, - struct task_struct *tsk); -extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk); -extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk); -extern void arch_update_kernel_hw_breakpoint(void *); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, - unsigned long val, void *data); + unsigned long val, void *data); + + +int arch_install_hw_breakpoint(struct perf_event *bp); +void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void hw_breakpoint_pmu_read(struct perf_event *bp); +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); + +extern void +arch_fill_perf_breakpoint(struct perf_event *bp); + +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); + +extern int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type); + +extern struct pmu perf_ops_bp; + #endif /* __KERNEL__ */ #endif /* _I386_HW_BREAKPOINT_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 61aafb7..820f300 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -423,6 +423,8 @@ extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; +struct perf_event; + struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -444,12 +446,10 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; - /* Hardware debugging registers: */ - unsigned long debugreg[HBP_NUM]; - unsigned long debugreg6; - unsigned long debugreg7; - /* Hardware breakpoint info */ - struct hw_breakpoint *hbp[HBP_NUM]; + /* Save middle states of ptrace breakpoints */ + struct perf_event *ptrace_bps[HBP_NUM]; + /* Debug status used for traps, single steps, etc... */ + unsigned long debugreg6; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 9316a9d..e622620 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ * * Copyright (C) 2007 Alan Stern * Copyright (C) 2009 IBM Corporation + * Copyright (C) 2009 Frederic Weisbecker */ /* @@ -22,6 +23,8 @@ * using the CPU's debug registers. */ +#include +#include #include #include #include @@ -38,26 +41,24 @@ #include #include -/* Unmasked kernel DR7 value */ -static unsigned long kdr7; +/* Per cpu debug control register value */ +DEFINE_PER_CPU(unsigned long, dr7); + +/* Per cpu debug address registers values */ +static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); /* - * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. - * Used to clear and verify the status of bits corresponding to DR0 - DR3 + * Stores the breakpoints currently in use on each breakpoint address + * register for each cpus */ -static const unsigned long dr7_masks[HBP_NUM] = { - 0x000f0003, /* LEN0, R/W0, G0, L0 */ - 0x00f0000c, /* LEN1, R/W1, G1, L1 */ - 0x0f000030, /* LEN2, R/W2, G2, L2 */ - 0xf00000c0 /* LEN3, R/W3, G3, L3 */ -}; +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); /* * Encode the length, type, Exact, and Enable bits for a particular breakpoint * as stored in debug register 7. */ -static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) { unsigned long bp_info; @@ -68,64 +69,89 @@ static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) return bp_info; } -void arch_update_kernel_hw_breakpoint(void *unused) +/* + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. + */ +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) { - struct hw_breakpoint *bp; - int i, cpu = get_cpu(); - unsigned long temp_kdr7 = 0; - - /* Don't allow debug exceptions while we update the registers */ - set_debugreg(0UL, 7); + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - for (i = hbp_kernel_pos; i < HBP_NUM; i++) { - per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; - if (bp) { - temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type); - set_debugreg(bp->info.address, i); - } - } + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; - /* No need to set DR6. Update the debug registers with kernel-space - * breakpoint values from kdr7 and user-space requests from the - * current process - */ - kdr7 = temp_kdr7; - set_debugreg(kdr7 | current->thread.debugreg7, 7); - put_cpu(); + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; } /* - * Install the thread breakpoints in their debug registers. + * Install a perf counter breakpoint. + * + * We seek a free debug address register and use it for this + * breakpoint. Eventually we enable it in the debug control register. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. */ -void arch_install_thread_hw_breakpoint(struct task_struct *tsk) +int arch_install_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - - switch (hbp_kernel_pos) { - case 4: - set_debugreg(thread->debugreg[3], 3); - case 3: - set_debugreg(thread->debugreg[2], 2); - case 2: - set_debugreg(thread->debugreg[1], 1); - case 1: - set_debugreg(thread->debugreg[0], 0); - default: - break; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (!*slot) { + *slot = bp; + break; + } } - /* No need to set DR6 */ - set_debugreg((kdr7 | thread->debugreg7), 7); + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return -EBUSY; + + set_debugreg(info->address, i); + __get_cpu_var(cpu_debugreg[i]) = info->address; + + dr7 = &__get_cpu_var(dr7); + *dr7 |= encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); + + return 0; } /* - * Install the debug register values for just the kernel, no thread. + * Uninstall the breakpoint contained in the given counter. + * + * First we search the debug address register it uses and then we disable + * it. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. */ -void arch_uninstall_thread_hw_breakpoint(void) +void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - /* Clear the user-space portion of debugreg7 by setting only kdr7 */ - set_debugreg(kdr7, 7); + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (*slot == bp) { + *slot = NULL; + break; + } + } + + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return; + dr7 = &__get_cpu_var(dr7); + *dr7 &= ~encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); } static int get_hbp_len(u8 hbp_len) @@ -133,17 +159,17 @@ static int get_hbp_len(u8 hbp_len) unsigned int len_in_bytes = 0; switch (hbp_len) { - case HW_BREAKPOINT_LEN_1: + case X86_BREAKPOINT_LEN_1: len_in_bytes = 1; break; - case HW_BREAKPOINT_LEN_2: + case X86_BREAKPOINT_LEN_2: len_in_bytes = 2; break; - case HW_BREAKPOINT_LEN_4: + case X86_BREAKPOINT_LEN_4: len_in_bytes = 4; break; #ifdef CONFIG_X86_64 - case HW_BREAKPOINT_LEN_8: + case X86_BREAKPOINT_LEN_8: len_in_bytes = 8; break; #endif @@ -178,67 +204,146 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) /* * Store a breakpoint's encoded address, length, and type. */ -static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) +static int arch_store_info(struct perf_event *bp) { - /* - * User-space requests will always have the address field populated - * Symbol names from user-space are rejected - */ - if (tsk && bp->info.name) - return -EINVAL; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); /* * For kernel-addresses, either the address or symbol name can be * specified. */ - if (bp->info.name) - bp->info.address = (unsigned long) - kallsyms_lookup_name(bp->info.name); - if (bp->info.address) + if (info->name) + info->address = (unsigned long) + kallsyms_lookup_name(info->name); + if (info->address) return 0; + return -EINVAL; } -/* - * Validate the arch-specific HW Breakpoint register settings - */ -int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, - struct task_struct *tsk) +int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type) { - unsigned int align; - int ret = -EINVAL; + /* Len */ + switch (x86_len) { + case X86_BREAKPOINT_LEN_1: + *gen_len = HW_BREAKPOINT_LEN_1; + break; + case X86_BREAKPOINT_LEN_2: + *gen_len = HW_BREAKPOINT_LEN_2; + break; + case X86_BREAKPOINT_LEN_4: + *gen_len = HW_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + *gen_len = HW_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } - switch (bp->info.type) { - /* - * Ptrace-refactoring code - * For now, we'll allow instruction breakpoint only for user-space - * addresses - */ - case HW_BREAKPOINT_EXECUTE: - if ((!arch_check_va_in_userspace(bp->info.address, - bp->info.len)) && - bp->info.len != HW_BREAKPOINT_LEN_EXECUTE) - return ret; + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + *gen_type = HW_BREAKPOINT_X; break; - case HW_BREAKPOINT_WRITE: + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; break; - case HW_BREAKPOINT_RW: + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; break; default: - return ret; + return -EINVAL; } - switch (bp->info.len) { + return 0; +} + + +static int arch_build_bp_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + info->address = bp->attr.bp_addr; + + /* Len */ + switch (bp->attr.bp_len) { case HW_BREAKPOINT_LEN_1: - align = 0; + info->len = X86_BREAKPOINT_LEN_1; break; case HW_BREAKPOINT_LEN_2: - align = 1; + info->len = X86_BREAKPOINT_LEN_2; break; case HW_BREAKPOINT_LEN_4: - align = 3; + info->len = X86_BREAKPOINT_LEN_4; break; #ifdef CONFIG_X86_64 case HW_BREAKPOINT_LEN_8: + info->len = X86_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } + + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_W: + info->type = X86_BREAKPOINT_WRITE; + break; + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + info->type = X86_BREAKPOINT_RW; + break; + case HW_BREAKPOINT_X: + info->type = X86_BREAKPOINT_EXECUTE; + break; + default: + return -EINVAL; + } + + return 0; +} +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned int align; + int ret; + + + ret = arch_build_bp_info(bp); + if (ret) + return ret; + + ret = -EINVAL; + + if (info->type == X86_BREAKPOINT_EXECUTE) + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + if ((!arch_check_va_in_userspace(info->address, info->len)) && + info->len != X86_BREAKPOINT_EXECUTE) + return ret; + + switch (info->len) { + case X86_BREAKPOINT_LEN_1: + align = 0; + break; + case X86_BREAKPOINT_LEN_2: + align = 1; + break; + case X86_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: align = 7; break; #endif @@ -246,8 +351,8 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, return ret; } - if (bp->triggered) - ret = arch_store_info(bp, tsk); + if (bp->callback) + ret = arch_store_info(bp); if (ret < 0) return ret; @@ -255,44 +360,47 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, * Check that the low-order bits of the address are appropriate * for the alignment implied by len. */ - if (bp->info.address & align) + if (info->address & align) return -EINVAL; /* Check that the virtual address is in the proper range */ if (tsk) { - if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) + if (!arch_check_va_in_userspace(info->address, info->len)) return -EFAULT; } else { - if (!arch_check_va_in_kernelspace(bp->info.address, - bp->info.len)) + if (!arch_check_va_in_kernelspace(info->address, info->len)) return -EFAULT; } + return 0; } -void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) +/* + * Release the user breakpoints used by ptrace + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { - struct thread_struct *thread = &(tsk->thread); - struct hw_breakpoint *bp = thread->hbp[pos]; - - thread->debugreg7 &= ~dr7_masks[pos]; - if (bp) { - thread->debugreg[pos] = bp->info.address; - thread->debugreg7 |= encode_dr7(pos, bp->info.len, - bp->info.type); - } else - thread->debugreg[pos] = 0; + int i; + struct thread_struct *t = &tsk->thread; + + for (i = 0; i < HBP_NUM; i++) { + unregister_hw_breakpoint(t->ptrace_bps[i]); + t->ptrace_bps[i] = NULL; + } } -void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) +#ifdef CONFIG_KVM +void hw_breakpoint_restore(void) { - int i; - struct thread_struct *thread = &(tsk->thread); - - thread->debugreg7 = 0; - for (i = 0; i < HBP_NUM; i++) - thread->debugreg[i] = 0; + set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); + set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); + set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); + set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); + set_debugreg(current->thread.debugreg6, 6); + set_debugreg(__get_cpu_var(dr7), 7); } +EXPORT_SYMBOL_GPL(hw_breakpoint_restore); +#endif /* * Handle debug exception notifications. @@ -313,7 +421,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) static int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; - struct hw_breakpoint *bp; + struct perf_event *bp; unsigned long dr7, dr6; unsigned long *dr6_p; @@ -325,10 +433,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; - /* Lazy debug register switching */ - if (!test_tsk_thread_flag(current, TIF_DEBUG)) - arch_uninstall_thread_hw_breakpoint(); - get_debugreg(dr7, 7); /* Disable breakpoints during exception handling */ set_debugreg(0UL, 7); @@ -344,17 +448,18 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) for (i = 0; i < HBP_NUM; ++i) { if (likely(!(dr6 & (DR_TRAP0 << i)))) continue; + /* - * Find the corresponding hw_breakpoint structure and - * invoke its triggered callback. + * The counter may be concurrently released but that can only + * occur from a call_rcu() path. We can then safely fetch + * the breakpoint, use its callback, touch its counter + * while we are in an rcu_read_lock() path. */ - if (i >= hbp_kernel_pos) - bp = per_cpu(this_hbp_kernel[i], cpu); - else { - bp = current->thread.hbp[i]; - if (bp) - rc = NOTIFY_DONE; - } + rcu_read_lock(); + + bp = per_cpu(bp_per_reg[i], cpu); + if (bp) + rc = NOTIFY_DONE; /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling @@ -362,19 +467,23 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) (*dr6_p) &= ~(DR_TRAP0 << i); /* * bp can be NULL due to lazy debug register switching - * or due to the delay between updates of hbp_kernel_pos - * and this_hbp_kernel. + * or due to concurrent perf counter removing. */ - if (!bp) - continue; + if (!bp) { + rcu_read_unlock(); + break; + } + + (bp->callback)(bp, args->regs); - (bp->triggered)(bp, args->regs); + rcu_read_unlock(); } if (dr6 & (~DR_TRAP_BITS)) rc = NOTIFY_DONE; set_debugreg(dr7, 7); put_cpu(); + return rc; } @@ -389,3 +498,13 @@ int __kprobes hw_breakpoint_exceptions_notify( return hw_breakpoint_handler(data); } + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ + /* TODO */ +} + +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) +{ + /* TODO */ +} diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cf8ee00..744508e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ #include #include #include -#include unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -47,8 +47,6 @@ void free_thread_xstate(struct task_struct *tsk) kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); tsk->thread.xstate = NULL; } - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - flush_thread_hw_breakpoint(tsk); WARN(tsk->thread.ds_ctx, "leaking DS context\n"); } @@ -107,8 +105,7 @@ void flush_thread(void) } #endif - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - flush_thread_hw_breakpoint(tsk); + flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 209e748..d5bd313 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,7 +59,6 @@ #include #include #include -#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -264,9 +263,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) - goto out; + + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -287,13 +285,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); -out: if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } - if (err) - flush_thread_hw_breakpoint(p); clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); p->thread.ds_ctx = NULL; @@ -437,23 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) lazy_load_gs(next->gs); percpu_write(current_task, next_p); - /* - * There's a problem with moving the arch_install_thread_hw_breakpoint() - * call before current is updated. Suppose a kernel breakpoint is - * triggered in between the two, the hw-breakpoint handler will see that - * the 'current' task does not have TIF_DEBUG flag set and will think it - * is leftover from an old task (lazy switching) and will erase it. Then - * until the next context switch, no user-breakpoints will be installed. - * - * The real problem is that it's impossible to update both current and - * physical debug registers at the same instant, so there will always be - * a window in which they disagree and a breakpoint might get triggered. - * Since we use lazy switching, we are forced to assume that a - * disagreement means that current is correct and the exception is due - * to lazy debug register switching. - */ - if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) - arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 72edac0..5bafdec 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -53,7 +53,6 @@ #include #include #include -#include asmlinkage extern void ret_from_fork(void); @@ -244,8 +243,6 @@ void release_thread(struct task_struct *dead_task) BUG(); } } - if (unlikely(dead_task->thread.debugreg7)) - flush_thread_hw_breakpoint(dead_task); } static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) @@ -309,9 +306,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, savesegment(ds, p->thread.ds); err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) - if (copy_thread_hw_breakpoint(me, p, clone_flags)) - goto out; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -351,8 +346,6 @@ out: kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } - if (err) - flush_thread_hw_breakpoint(p); return err; } @@ -508,23 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (preload_fpu) __math_state_restore(); - /* - * There's a problem with moving the arch_install_thread_hw_breakpoint() - * call before current is updated. Suppose a kernel breakpoint is - * triggered in between the two, the hw-breakpoint handler will see that - * the 'current' task does not have TIF_DEBUG flag set and will think it - * is leftover from an old task (lazy switching) and will erase it. Then - * until the next context switch, no user-breakpoints will be installed. - * - * The real problem is that it's impossible to update both current and - * physical debug registers at the same instant, so there will always be - * a window in which they disagree and a breakpoint might get triggered. - * Since we use lazy switching, we are forced to assume that a - * disagreement means that current is correct and the exception is due - * to lazy debug register switching. - */ - if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) - arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 267cb85..e79610d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target, return ret; } -/* - * Decode the length and type bits for a particular breakpoint as - * stored in debug register 7. Return the "enabled" status. - */ -static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, - unsigned *type) -{ - int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - - *len = (bp_info & 0xc) | 0x40; - *type = (bp_info & 0x3) | 0x80; - return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; -} - -static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) +static void ptrace_triggered(struct perf_event *bp, void *data) { - struct thread_struct *thread = &(current->thread); int i; + struct thread_struct *thread = &(current->thread); /* * Store in the virtual DR6 register the fact that the breakpoint * was hit so the thread's debugger will see it. */ - for (i = 0; i < hbp_kernel_pos; i++) - /* - * We will check bp->info.address against the address stored in - * thread's hbp structure and not debugreg[i]. This is to ensure - * that the corresponding bit for 'i' in DR7 register is enabled - */ - if (bp->info.address == thread->hbp[i]->info.address) + for (i = 0; i < HBP_NUM; i++) { + if (thread->ptrace_bps[i] == bp) break; + } thread->debugreg6 |= (DR_TRAP0 << i); } /* + * Walk through every ptrace breakpoints for this thread and + * build the dr7 value on top of their attributes. + * + */ +static unsigned long ptrace_get_dr7(struct perf_event *bp[]) +{ + int i; + int dr7 = 0; + struct arch_hw_breakpoint *info; + + for (i = 0; i < HBP_NUM; i++) { + if (bp[i] && !bp[i]->attr.disabled) { + info = counter_arch_bp(bp[i]); + dr7 |= encode_dr7(i, info->len, info->type); + } + } + + return dr7; +} + +/* * Handle ptrace writes to debug register 7. */ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) { struct thread_struct *thread = &(tsk->thread); - unsigned long old_dr7 = thread->debugreg7; + unsigned long old_dr7; int i, orig_ret = 0, rc = 0; int enabled, second_pass = 0; unsigned len, type; - struct hw_breakpoint *bp; + int gen_len, gen_type; + struct perf_event *bp; data &= ~DR_CONTROL_RESERVED; + old_dr7 = ptrace_get_dr7(thread->ptrace_bps); restore: /* * Loop through all the hardware breakpoints, making the @@ -496,11 +503,12 @@ restore: */ for (i = 0; i < HBP_NUM; i++) { enabled = decode_dr7(data, i, &len, &type); - bp = thread->hbp[i]; + bp = thread->ptrace_bps[i]; if (!enabled) { if (bp) { - /* Don't unregister the breakpoints right-away, + /* + * Don't unregister the breakpoints right-away, * unless all register_user_hw_breakpoint() * requests have succeeded. This prevents * any window of opportunity for debug @@ -508,27 +516,45 @@ restore: */ if (!second_pass) continue; - unregister_user_hw_breakpoint(tsk, bp); - kfree(bp); + thread->ptrace_bps[i] = NULL; + unregister_hw_breakpoint(bp); } continue; } + + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ if (!bp) { - rc = -ENOMEM; - bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (bp) { - bp->info.address = thread->debugreg[i]; - bp->triggered = ptrace_triggered; - bp->info.len = len; - bp->info.type = type; - rc = register_user_hw_breakpoint(tsk, bp); - if (rc) - kfree(bp); - } - } else - rc = modify_user_hw_breakpoint(tsk, bp); + rc = -EINVAL; + break; + } + + rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); if (rc) break; + + /* + * This is a temporary thing as bp is unregistered/registered + * to simulate modification + */ + bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, + gen_type, bp->callback, + tsk, true); + thread->ptrace_bps[i] = NULL; + + if (!bp) { /* incorrect bp, or we have a bug in bp API */ + rc = -EINVAL; + break; + } + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + bp = NULL; + break; + } + thread->ptrace_bps[i] = bp; } /* * Make a second pass to free the remaining unused breakpoints @@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) struct thread_struct *thread = &(tsk->thread); unsigned long val = 0; - if (n < HBP_NUM) - val = thread->debugreg[n]; - else if (n == 6) + if (n < HBP_NUM) { + struct perf_event *bp; + bp = thread->ptrace_bps[n]; + if (!bp) + return 0; + val = bp->hw.info.address; + } else if (n == 6) { val = thread->debugreg6; - else if (n == 7) - val = thread->debugreg7; + } else if (n == 7) { + val = ptrace_get_dr7(thread->ptrace_bps); + } return val; } +static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, + unsigned long addr) +{ + struct perf_event *bp; + struct thread_struct *t = &tsk->thread; + + if (!t->ptrace_bps[nr]) { + /* + * Put stub len and type to register (reserve) an inactive but + * correct bp + */ + bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, + HW_BREAKPOINT_W, + ptrace_triggered, tsk, + false); + } else { + bp = t->ptrace_bps[nr]; + t->ptrace_bps[nr] = NULL; + bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, + bp->attr.bp_type, + bp->callback, + tsk, + bp->attr.disabled); + } + + if (!bp) + return -EIO; + /* + * CHECKME: the previous code returned -EIO if the addr wasn't a + * valid task virtual addr. The new one will return -EINVAL in this + * case. + * -EINVAL may be what we want for in-kernel breakpoints users, but + * -EIO looks better for ptrace, since we refuse a register writing + * for the user. And anyway this is the previous behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; + + return 0; +} + /* * Handle PTRACE_POKEUSR calls for the debug register area. */ @@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) return -EIO; if (n == 6) { - tsk->thread.debugreg6 = val; + thread->debugreg6 = val; goto ret_path; } if (n < HBP_NUM) { - if (thread->hbp[n]) { - if (arch_check_va_in_userspace(val, - thread->hbp[n]->info.len) == 0) { - rc = -EIO; - goto ret_path; - } - thread->hbp[n]->info.address = val; - } - thread->debugreg[n] = val; + rc = ptrace_set_breakpoint_addr(tsk, n, val); + if (rc) + return rc; } /* All that's left is DR7 */ if (n == 7) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 213a7a3..565ebc6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include @@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused) x86_cpuinit.setup_percpu_clockev(); wmb(); - load_debug_registers(); cpu_idle(); } @@ -1269,7 +1267,6 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); - hw_breakpoint_disable(); } int native_cpu_disable(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fc2974a..22dee7a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -42,6 +42,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" +#include #include #include #include @@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) trace_kvm_entry(vcpu->vcpu_id); kvm_x86_ops->run(vcpu, kvm_run); - if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } + /* + * If the guest has used debug registers, at least dr7 + * will be disabled while returning to the host. + * If we don't have active breakpoints in the host, we don't + * care about the messed up debug address registers. But if + * we have some of them active, restore the old state. + */ + if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK) + hw_breakpoint_restore(); set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index e09a44f..0a979f3 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); #endif - hw_breakpoint_disable(); } /* Needed by apm.c */ @@ -144,11 +143,6 @@ static void fix_processor_context(void) #endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - load_debug_registers(); } /** diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 61ccc8f..7eba9b9 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -1,136 +1,131 @@ #ifndef _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H +#include -#ifdef __KERNEL__ -#include -#include -#include - -/** - * struct hw_breakpoint - unified kernel/user-space hardware breakpoint - * @triggered: callback invoked after target address access - * @info: arch-specific breakpoint info (address, length, and type) - * - * %hw_breakpoint structures are the kernel's way of representing - * hardware breakpoints. These are data breakpoints - * (also known as "watchpoints", triggered on data access), and the breakpoint's - * target address can be located in either kernel space or user space. - * - * The breakpoint's address, length, and type are highly - * architecture-specific. The values are encoded in the @info field; you - * specify them when registering the breakpoint. To examine the encoded - * values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared - * below. - * - * The address is specified as a regular kernel pointer (for kernel-space - * breakponts) or as an %__user pointer (for user-space breakpoints). - * With register_user_hw_breakpoint(), the address must refer to a - * location in user space. The breakpoint will be active only while the - * requested task is running. Conversely with - * register_kernel_hw_breakpoint(), the address must refer to a location - * in kernel space, and the breakpoint will be active on all CPUs - * regardless of the current task. - * - * The length is the breakpoint's extent in bytes, which is subject to - * certain limitations. include/asm/hw_breakpoint.h contains macros - * defining the available lengths for a specific architecture. Note that - * the address's alignment must match the length. The breakpoint will - * catch accesses to any byte in the range from address to address + - * (length - 1). - * - * The breakpoint's type indicates the sort of access that will cause it - * to trigger. Possible values may include: - * - * %HW_BREAKPOINT_RW (triggered on read or write access), - * %HW_BREAKPOINT_WRITE (triggered on write access), and - * %HW_BREAKPOINT_READ (triggered on read access). - * - * Appropriate macros are defined in include/asm/hw_breakpoint.h; not all - * possibilities are available on all architectures. Execute breakpoints - * must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE. - * - * When a breakpoint gets hit, the @triggered callback is - * invoked in_interrupt with a pointer to the %hw_breakpoint structure and the - * processor registers. - * Data breakpoints occur after the memory access has taken place. - * Breakpoints are disabled during execution @triggered, to avoid - * recursive traps and allow unhindered access to breakpointed memory. - * - * This sample code sets a breakpoint on pid_max and registers a callback - * function for writes to that variable. Note that it is not portable - * as written, because not all architectures support HW_BREAKPOINT_LEN_4. - * - * ---------------------------------------------------------------------- - * - * #include - * - * struct hw_breakpoint my_bp; - * - * static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) - * { - * printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n"); - * dump_stack(); - * ............... - * } - * - * static struct hw_breakpoint my_bp; - * - * static int init_module(void) - * { - * ...................... - * my_bp.info.type = HW_BREAKPOINT_WRITE; - * my_bp.info.len = HW_BREAKPOINT_LEN_4; - * - * my_bp.installed = (void *)my_bp_installed; - * - * rc = register_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * static void cleanup_module(void) - * { - * ...................... - * unregister_kernel_hw_breakpoint(&my_bp); - * ...................... - * } - * - * ---------------------------------------------------------------------- - */ -struct hw_breakpoint { - void (*triggered)(struct hw_breakpoint *, struct pt_regs *); - struct arch_hw_breakpoint info; +enum { + HW_BREAKPOINT_LEN_1 = 1, + HW_BREAKPOINT_LEN_2 = 2, + HW_BREAKPOINT_LEN_4 = 4, + HW_BREAKPOINT_LEN_8 = 8, }; -/* - * len and type values are defined in include/asm/hw_breakpoint.h. - * Available values vary according to the architecture. On i386 the - * possibilities are: - * - * HW_BREAKPOINT_LEN_1 - * HW_BREAKPOINT_LEN_2 - * HW_BREAKPOINT_LEN_4 - * HW_BREAKPOINT_RW - * HW_BREAKPOINT_READ - * - * On other architectures HW_BREAKPOINT_LEN_8 may be available, and the - * 1-, 2-, and 4-byte lengths may be unavailable. There also may be - * HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. - */ +enum { + HW_BREAKPOINT_R = 1, + HW_BREAKPOINT_W = 2, + HW_BREAKPOINT_X = 4, +}; + +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return &bp->hw.info; +} + +static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) +{ + return bp->attr.bp_addr; +} + +static inline int hw_breakpoint_type(struct perf_event *bp) +{ + return bp->attr.bp_type; +} + +static inline int hw_breakpoint_len(struct perf_event *bp) +{ + return bp->attr.bp_len; +} + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +extern struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active); + +/* FIXME: only change from the attr, and don't unregister */ +extern struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active); -extern int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern int modify_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); -extern void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp); /* * Kernel breakpoints are not associated with any particular thread. */ -extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); -extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); +extern struct perf_event * +register_wide_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active); + +extern struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active); + +extern int register_perf_hw_breakpoint(struct perf_event *bp); +extern int __register_perf_hw_breakpoint(struct perf_event *bp); +extern void unregister_hw_breakpoint(struct perf_event *bp); +extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); + +extern int reserve_bp_slot(struct perf_event *bp); +extern void release_bp_slot(struct perf_event *bp); + +extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); + +#else /* !CONFIG_HAVE_HW_BREAKPOINT */ + +static inline struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { return NULL; } +static inline struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { return NULL; } +static inline struct perf_event * +register_wide_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active) { return NULL; } +static inline struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active) { return NULL; } +static inline int +register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } +static inline int +__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } +static inline void unregister_hw_breakpoint(struct perf_event *bp) { } +static inline void +unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { } +static inline int +reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } +static inline void release_bp_slot(struct perf_event *bp) { } + +static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } -extern unsigned int hbp_kernel_pos; +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif /* __KERNEL__ */ -#endif /* _LINUX_HW_BREAKPOINT_H */ +#endif /* _LINUX_HW_BREAKPOINT_H */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8d54e6d..cead64e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -18,6 +18,10 @@ #include #include +#ifdef CONFIG_HAVE_HW_BREAKPOINT +#include +#endif + /* * User-space ABI bits: */ @@ -31,6 +35,7 @@ enum perf_type_id { PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_HW_CACHE = 3, PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, PERF_TYPE_MAX, /* non-ABI */ }; @@ -207,6 +212,15 @@ struct perf_event_attr { __u32 wakeup_events; /* wakeup every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; + + union { + struct { /* Hardware breakpoint info */ + __u64 bp_addr; + __u32 bp_type; + __u32 bp_len; + }; + }; + __u32 __reserved_2; __u64 __reserved_3; @@ -476,6 +490,11 @@ struct hw_perf_event { atomic64_t count; struct hrtimer hrtimer; }; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + union { /* breakpoint */ + struct arch_hw_breakpoint info; + }; +#endif }; atomic64_t prev_count; u64 sample_period; @@ -588,7 +607,7 @@ struct perf_event { u64 tstamp_running; u64 tstamp_stopped; - struct perf_event_attr attr; + struct perf_event_attr attr; struct hw_perf_event hw; struct perf_event_context *ctx; @@ -643,6 +662,8 @@ struct perf_event { perf_callback_t callback; + perf_callback_t event_callback; + #endif /* CONFIG_PERF_EVENTS */ }; @@ -831,6 +852,7 @@ extern int sysctl_perf_event_sample_rate; extern void perf_event_init(void); extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); +extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ @@ -865,6 +887,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; } static inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } +static inline void +perf_bp_event(struct perf_event *event, void *data) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } diff --git a/kernel/exit.c b/kernel/exit.c index e61891f..266f892 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -980,6 +981,10 @@ NORET_TYPE void do_exit(long code) proc_exit_connector(tsk); /* + * FIXME: do that only when needed, using sched_exit tracepoint + */ + flush_ptrace_hw_breakpoint(tsk); + /* * Flush inherited counters to the parent - before the parent * gets woken up by child-exit notifications. */ diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c1f64e6..08f6d01 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ * * Copyright (C) 2007 Alan Stern * Copyright (C) IBM Corporation, 2009 + * Copyright (C) 2009, Frederic Weisbecker */ /* @@ -35,334 +36,242 @@ #include #include -#include +#include + #include #ifdef CONFIG_X86 #include #endif -/* - * Spinlock that protects all (un)register operations over kernel/user-space - * breakpoint requests - */ -static DEFINE_SPINLOCK(hw_breakpoint_lock); - -/* Array of kernel-space breakpoint structures */ -struct hw_breakpoint *hbp_kernel[HBP_NUM]; - -/* - * Per-processor copy of hbp_kernel[]. Used only when hbp_kernel is being - * modified but we need the older copy to handle any hbp exceptions. It will - * sync with hbp_kernel[] value after updation is done through IPIs. - */ -DEFINE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); - -/* - * Kernel breakpoints grow downwards, starting from HBP_NUM - * 'hbp_kernel_pos' denotes lowest numbered breakpoint register occupied for - * kernel-space request. We will initialise it here and not in an __init - * routine because load_debug_registers(), which uses this variable can be - * called very early during CPU initialisation. - */ -unsigned int hbp_kernel_pos = HBP_NUM; -/* - * An array containing refcount of threads using a given bkpt register - * Accesses are synchronised by acquiring hw_breakpoint_lock - */ -unsigned int hbp_user_refcount[HBP_NUM]; +static atomic_t bp_slot; -/* - * Load the debug registers during startup of a CPU. - */ -void load_debug_registers(void) +int reserve_bp_slot(struct perf_event *bp) { - unsigned long flags; - struct task_struct *tsk = current; - - spin_lock_bh(&hw_breakpoint_lock); - - /* Prevent IPIs for new kernel breakpoint updates */ - local_irq_save(flags); - arch_update_kernel_hw_breakpoint(NULL); - local_irq_restore(flags); - - if (test_tsk_thread_flag(tsk, TIF_DEBUG)) - arch_install_thread_hw_breakpoint(tsk); - - spin_unlock_bh(&hw_breakpoint_lock); -} + if (atomic_inc_return(&bp_slot) == HBP_NUM) { + atomic_dec(&bp_slot); -/* - * Erase all the hardware breakpoint info associated with a thread. - * - * If tsk != current then tsk must not be usable (for example, a - * child being cleaned up from a failed fork). - */ -void flush_thread_hw_breakpoint(struct task_struct *tsk) -{ - int i; - struct thread_struct *thread = &(tsk->thread); - - spin_lock_bh(&hw_breakpoint_lock); - - /* The thread no longer has any breakpoints associated with it */ - clear_tsk_thread_flag(tsk, TIF_DEBUG); - for (i = 0; i < HBP_NUM; i++) { - if (thread->hbp[i]) { - hbp_user_refcount[i]--; - kfree(thread->hbp[i]); - thread->hbp[i] = NULL; - } + return -ENOSPC; } - arch_flush_thread_hw_breakpoint(tsk); - - /* Actually uninstall the breakpoints if necessary */ - if (tsk == current) - arch_uninstall_thread_hw_breakpoint(); - spin_unlock_bh(&hw_breakpoint_lock); + return 0; } -/* - * Copy the hardware breakpoint info from a thread to its cloned child. - */ -int copy_thread_hw_breakpoint(struct task_struct *tsk, - struct task_struct *child, unsigned long clone_flags) +void release_bp_slot(struct perf_event *bp) { - /* - * We will assume that breakpoint settings are not inherited - * and the child starts out with no debug registers set. - * But what about CLONE_PTRACE? - */ - clear_tsk_thread_flag(child, TIF_DEBUG); - - /* We will call flush routine since the debugregs are not inherited */ - arch_flush_thread_hw_breakpoint(child); - - return 0; + atomic_dec(&bp_slot); } -static int __register_user_hw_breakpoint(int pos, struct task_struct *tsk, - struct hw_breakpoint *bp) +int __register_perf_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - int rc; + int ret; - /* Do not overcommit. Fail if kernel has used the hbp registers */ - if (pos >= hbp_kernel_pos) - return -ENOSPC; + ret = reserve_bp_slot(bp); + if (ret) + return ret; - rc = arch_validate_hwbkpt_settings(bp, tsk); - if (rc) - return rc; + if (!bp->attr.disabled) + ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); - thread->hbp[pos] = bp; - hbp_user_refcount[pos]++; + return ret; +} - arch_update_user_hw_breakpoint(pos, tsk); - /* - * Does it need to be installed right now? - * Otherwise it will get installed the next time tsk runs - */ - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); +int register_perf_hw_breakpoint(struct perf_event *bp) +{ + bp->callback = perf_bp_event; - return rc; + return __register_perf_hw_breakpoint(bp); } /* - * Modify the address of a hbp register already in use by the task - * Do not invoke this in-lieu of a __unregister_user_hw_breakpoint() + * Register a breakpoint bound to a task and a given cpu. + * If cpu is -1, the breakpoint is active for the task in every cpu + * If the task is -1, the breakpoint is active for every tasks in the given + * cpu. */ -static int __modify_user_hw_breakpoint(int pos, struct task_struct *tsk, - struct hw_breakpoint *bp) +static struct perf_event * +register_user_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + pid_t pid, + int cpu, + bool active) { - struct thread_struct *thread = &(tsk->thread); - - if ((pos >= hbp_kernel_pos) || (arch_validate_hwbkpt_settings(bp, tsk))) - return -EINVAL; - - if (thread->hbp[pos] == NULL) - return -EINVAL; - - thread->hbp[pos] = bp; + struct perf_event_attr *attr; + struct perf_event *bp; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return ERR_PTR(-ENOMEM); + + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(*attr); + attr->bp_addr = addr; + attr->bp_len = len; + attr->bp_type = type; /* - * 'pos' must be that of a hbp register already used by 'tsk' - * Otherwise arch_modify_user_hw_breakpoint() will fail + * Such breakpoints are used by debuggers to trigger signals when + * we hit the excepted memory op. We can't miss such events, they + * must be pinned. */ - arch_update_user_hw_breakpoint(pos, tsk); + attr->pinned = 1; - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); + if (!active) + attr->disabled = 1; - return 0; -} - -static void __unregister_user_hw_breakpoint(int pos, struct task_struct *tsk) -{ - hbp_user_refcount[pos]--; - tsk->thread.hbp[pos] = NULL; + bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); + kfree(attr); - arch_update_user_hw_breakpoint(pos, tsk); - - if (tsk == current) - arch_install_thread_hw_breakpoint(tsk); + return bp; } /** * register_user_hw_breakpoint - register a hardware breakpoint for user space + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @bp: the breakpoint structure to register - * - * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and - * @bp->triggered must be set properly before invocation + * @active: should we activate it while registering it * */ -int register_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp) +struct perf_event * +register_user_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { - struct thread_struct *thread = &(tsk->thread); - int i, rc = -ENOSPC; - - spin_lock_bh(&hw_breakpoint_lock); - - for (i = 0; i < hbp_kernel_pos; i++) { - if (!thread->hbp[i]) { - rc = __register_user_hw_breakpoint(i, tsk, bp); - break; - } - } - if (!rc) - set_tsk_thread_flag(tsk, TIF_DEBUG); - - spin_unlock_bh(&hw_breakpoint_lock); - return rc; + return register_user_hw_breakpoint_cpu(addr, len, type, triggered, + tsk->pid, -1, active); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); /** * modify_user_hw_breakpoint - modify a user-space hardware breakpoint + * @bp: the breakpoint structure to modify + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @bp: the breakpoint structure to unregister - * + * @active: should we activate it while registering it */ -int modify_user_hw_breakpoint(struct task_struct *tsk, struct hw_breakpoint *bp) +struct perf_event * +modify_user_hw_breakpoint(struct perf_event *bp, + unsigned long addr, + int len, + int type, + perf_callback_t triggered, + struct task_struct *tsk, + bool active) { - struct thread_struct *thread = &(tsk->thread); - int i, ret = -ENOENT; + /* + * FIXME: do it without unregistering + * - We don't want to lose our slot + * - If the new bp is incorrect, don't lose the older one + */ + unregister_hw_breakpoint(bp); - spin_lock_bh(&hw_breakpoint_lock); - for (i = 0; i < hbp_kernel_pos; i++) { - if (bp == thread->hbp[i]) { - ret = __modify_user_hw_breakpoint(i, tsk, bp); - break; - } - } - spin_unlock_bh(&hw_breakpoint_lock); - return ret; + return register_user_hw_breakpoint(addr, len, type, triggered, + tsk, active); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); /** - * unregister_user_hw_breakpoint - unregister a user-space hardware breakpoint - * @tsk: pointer to 'task_struct' of the process to which the address belongs + * unregister_hw_breakpoint - unregister a user-space hardware breakpoint * @bp: the breakpoint structure to unregister - * */ -void unregister_user_hw_breakpoint(struct task_struct *tsk, - struct hw_breakpoint *bp) +void unregister_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - int i, pos = -1, hbp_counter = 0; - - spin_lock_bh(&hw_breakpoint_lock); - for (i = 0; i < hbp_kernel_pos; i++) { - if (thread->hbp[i]) - hbp_counter++; - if (bp == thread->hbp[i]) - pos = i; - } - if (pos >= 0) { - __unregister_user_hw_breakpoint(pos, tsk); - hbp_counter--; - } - if (!hbp_counter) - clear_tsk_thread_flag(tsk, TIF_DEBUG); - - spin_unlock_bh(&hw_breakpoint_lock); + if (!bp) + return; + perf_event_release_kernel(bp); +} +EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); + +static struct perf_event * +register_kernel_hw_breakpoint_cpu(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + int cpu, + bool active) +{ + return register_user_hw_breakpoint_cpu(addr, len, type, triggered, + -1, cpu, active); } -EXPORT_SYMBOL_GPL(unregister_user_hw_breakpoint); /** - * register_kernel_hw_breakpoint - register a hardware breakpoint for kernel space - * @bp: the breakpoint structure to register - * - * @bp.info->name or @bp.info->address, @bp.info->len, @bp.info->type and - * @bp->triggered must be set properly before invocation + * register_wide_hw_breakpoint - register a wide breakpoint in the kernel + * @addr: is the memory address that triggers the breakpoint + * @len: the length of the access to the memory (1 byte, 2 bytes etc...) + * @type: the type of the access to the memory (read/write/exec) + * @triggered: callback to trigger when we hit the breakpoint + * @active: should we activate it while registering it * + * @return a set of per_cpu pointers to perf events */ -int register_kernel_hw_breakpoint(struct hw_breakpoint *bp) +struct perf_event ** +register_wide_hw_breakpoint(unsigned long addr, + int len, + int type, + perf_callback_t triggered, + bool active) { - int rc; + struct perf_event **cpu_events, **pevent, *bp; + long err; + int cpu; + + cpu_events = alloc_percpu(typeof(*cpu_events)); + if (!cpu_events) + return ERR_PTR(-ENOMEM); - rc = arch_validate_hwbkpt_settings(bp, NULL); - if (rc) - return rc; + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + bp = register_kernel_hw_breakpoint_cpu(addr, len, type, + triggered, cpu, active); - spin_lock_bh(&hw_breakpoint_lock); + *pevent = bp; - rc = -ENOSPC; - /* Check if we are over-committing */ - if ((hbp_kernel_pos > 0) && (!hbp_user_refcount[hbp_kernel_pos-1])) { - hbp_kernel_pos--; - hbp_kernel[hbp_kernel_pos] = bp; - on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); - rc = 0; + if (IS_ERR(bp) || !bp) { + err = PTR_ERR(bp); + goto fail; + } } - spin_unlock_bh(&hw_breakpoint_lock); - return rc; + return cpu_events; + +fail: + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + if (IS_ERR(*pevent) || !*pevent) + break; + unregister_hw_breakpoint(*pevent); + } + free_percpu(cpu_events); + /* return the error if any */ + return ERR_PTR(err); } -EXPORT_SYMBOL_GPL(register_kernel_hw_breakpoint); /** - * unregister_kernel_hw_breakpoint - unregister a HW breakpoint for kernel space - * @bp: the breakpoint structure to unregister - * - * Uninstalls and unregisters @bp. + * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel + * @cpu_events: the per cpu set of events to unregister */ -void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp) +void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { - int i, j; - - spin_lock_bh(&hw_breakpoint_lock); - - /* Find the 'bp' in our list of breakpoints for kernel */ - for (i = hbp_kernel_pos; i < HBP_NUM; i++) - if (bp == hbp_kernel[i]) - break; + int cpu; + struct perf_event **pevent; - /* Check if we did not find a match for 'bp'. If so return early */ - if (i == HBP_NUM) { - spin_unlock_bh(&hw_breakpoint_lock); - return; + for_each_possible_cpu(cpu) { + pevent = per_cpu_ptr(cpu_events, cpu); + unregister_hw_breakpoint(*pevent); } - - /* - * We'll shift the breakpoints one-level above to compact if - * unregistration creates a hole - */ - for (j = i; j > hbp_kernel_pos; j--) - hbp_kernel[j] = hbp_kernel[j-1]; - - hbp_kernel[hbp_kernel_pos] = NULL; - on_each_cpu(arch_update_kernel_hw_breakpoint, NULL, 1); - hbp_kernel_pos++; - - spin_unlock_bh(&hw_breakpoint_lock); + free_percpu(cpu_events); } -EXPORT_SYMBOL_GPL(unregister_kernel_hw_breakpoint); + static struct notifier_block hw_breakpoint_exceptions_nb = { .notifier_call = hw_breakpoint_exceptions_notify, @@ -374,5 +283,12 @@ static int __init init_hw_breakpoint(void) { return register_die_notifier(&hw_breakpoint_exceptions_nb); } - core_initcall(init_hw_breakpoint); + + +struct pmu perf_ops_bp = { + .enable = arch_install_hw_breakpoint, + .disable = arch_uninstall_hw_breakpoint, + .read = hw_breakpoint_pmu_read, + .unthrottle = hw_breakpoint_pmu_unthrottle +}; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 5087125..98dc56b 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -4229,6 +4230,51 @@ static void perf_event_free_filter(struct perf_event *event) #endif /* CONFIG_EVENT_PROFILE */ +#ifdef CONFIG_HAVE_HW_BREAKPOINT +static void bp_perf_event_destroy(struct perf_event *event) +{ + release_bp_slot(event); +} + +static const struct pmu *bp_perf_event_init(struct perf_event *bp) +{ + int err; + /* + * The breakpoint is already filled if we haven't created the counter + * through perf syscall + * FIXME: manage to get trigerred to NULL if it comes from syscalls + */ + if (!bp->callback) + err = register_perf_hw_breakpoint(bp); + else + err = __register_perf_hw_breakpoint(bp); + if (err) + return ERR_PTR(err); + + bp->destroy = bp_perf_event_destroy; + + return &perf_ops_bp; +} + +void perf_bp_event(struct perf_event *bp, void *regs) +{ + /* TODO */ +} +#else +static void bp_perf_event_destroy(struct perf_event *event) +{ +} + +static const struct pmu *bp_perf_event_init(struct perf_event *bp) +{ + return NULL; +} + +void perf_bp_event(struct perf_event *bp, void *regs) +{ +} +#endif + atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; static void sw_perf_event_destroy(struct perf_event *event) @@ -4375,6 +4421,11 @@ perf_event_alloc(struct perf_event_attr *attr, pmu = tp_perf_event_init(event); break; + case PERF_TYPE_BREAKPOINT: + pmu = bp_perf_event_init(event); + break; + + default: break; } @@ -4686,7 +4737,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, ctx = find_get_context(pid, cpu); if (IS_ERR(ctx)) - return NULL ; + return NULL; event = perf_event_alloc(attr, cpu, ctx, NULL, NULL, callback, GFP_KERNEL); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 91c3d0e..d72f06f 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -11,14 +11,11 @@ #include #include #include +#include #include #include -#ifdef CONFIG_KSYM_TRACER -#include -#endif - enum trace_type { __TRACE_FIRST_TYPE = 0, diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e19747d..c16a08f 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -372,11 +372,11 @@ FTRACE_ENTRY(ksym_trace, ksym_trace_entry, F_STRUCT( __field( unsigned long, ip ) __field( unsigned char, type ) - __array( char , ksym_name, KSYM_NAME_LEN ) __array( char , cmd, TASK_COMM_LEN ) + __field( unsigned long, addr ) ), - F_printk("ip: %pF type: %d ksym_name: %s cmd: %s", + F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", (void *)__entry->ip, (unsigned int)__entry->type, - __entry->ksym_name, __entry->cmd) + (void *)__entry->addr, __entry->cmd) ); diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 6d5609c..fea83ee 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -29,7 +29,11 @@ #include "trace_stat.h" #include "trace.h" -/* For now, let us restrict the no. of symbols traced simultaneously to number +#include +#include + +/* + * For now, let us restrict the no. of symbols traced simultaneously to number * of available hardware breakpoint registers. */ #define KSYM_TRACER_MAX HBP_NUM @@ -37,8 +41,10 @@ #define KSYM_TRACER_OP_LEN 3 /* rw- */ struct trace_ksym { - struct hw_breakpoint *ksym_hbp; + struct perf_event **ksym_hbp; unsigned long ksym_addr; + int type; + int len; #ifdef CONFIG_PROFILE_KSYM_TRACER unsigned long counter; #endif @@ -75,10 +81,11 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) } #endif /* CONFIG_PROFILE_KSYM_TRACER */ -void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) +void ksym_hbp_handler(struct perf_event *hbp, void *data) { struct ring_buffer_event *event; struct ksym_trace_entry *entry; + struct pt_regs *regs = data; struct ring_buffer *buffer; int pc; @@ -96,12 +103,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) entry = ring_buffer_event_data(event); entry->ip = instruction_pointer(regs); - entry->type = hbp->info.type; - strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); + entry->type = hw_breakpoint_type(hbp); + entry->addr = hw_breakpoint_addr(hbp); strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); #ifdef CONFIG_PROFILE_KSYM_TRACER - ksym_collect_stats(hbp->info.address); + ksym_collect_stats(hw_breakpoint_addr(hbp)); #endif /* CONFIG_PROFILE_KSYM_TRACER */ trace_buffer_unlock_commit(buffer, event, 0, pc); @@ -120,31 +127,21 @@ static int ksym_trace_get_access_type(char *str) int access = 0; if (str[0] == 'r') - access += 4; - else if (str[0] != '-') - return -EINVAL; + access |= HW_BREAKPOINT_R; if (str[1] == 'w') - access += 2; - else if (str[1] != '-') - return -EINVAL; + access |= HW_BREAKPOINT_W; - if (str[2] != '-') - return -EINVAL; + if (str[2] == 'x') + access |= HW_BREAKPOINT_X; switch (access) { - case 6: - access = HW_BREAKPOINT_RW; - break; - case 4: - access = -EINVAL; - break; - case 2: - access = HW_BREAKPOINT_WRITE; - break; + case HW_BREAKPOINT_W: + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + return access; + default: + return -EINVAL; } - - return access; } /* @@ -194,36 +191,33 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) if (!entry) return -ENOMEM; - entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (!entry->ksym_hbp) - goto err; - - entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL); - if (!entry->ksym_hbp->info.name) - goto err; - - entry->ksym_hbp->info.type = op; - entry->ksym_addr = entry->ksym_hbp->info.address = addr; -#ifdef CONFIG_X86 - entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4; -#endif - entry->ksym_hbp->triggered = (void *)ksym_hbp_handler; + entry->type = op; + entry->ksym_addr = addr; + entry->len = HW_BREAKPOINT_LEN_4; + + ret = -EAGAIN; + entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, + entry->len, entry->type, + ksym_hbp_handler, true); + if (IS_ERR(entry->ksym_hbp)) { + entry->ksym_hbp = NULL; + ret = PTR_ERR(entry->ksym_hbp); + } - ret = register_kernel_hw_breakpoint(entry->ksym_hbp); - if (ret < 0) { + if (!entry->ksym_hbp) { printk(KERN_INFO "ksym_tracer request failed. Try again" " later!!\n"); - ret = -EAGAIN; goto err; } + hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); ksym_filter_entry_count++; + return 0; + err: - if (entry->ksym_hbp) - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); + return ret; } @@ -244,10 +238,10 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name); - if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) + ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); + if (entry->type == HW_BREAKPOINT_W) ret = trace_seq_puts(s, "-w-\n"); - else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) + else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) ret = trace_seq_puts(s, "rw-\n"); WARN_ON_ONCE(!ret); } @@ -269,12 +263,10 @@ static void __ksym_trace_reset(void) mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, ksym_hlist) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); + unregister_wide_hw_breakpoint(entry->ksym_hbp); ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); } mutex_unlock(&ksym_tracer_mutex); @@ -327,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { if (entry->ksym_addr == ksym_addr) { /* Check for malformed request: (6) */ - if (entry->ksym_hbp->info.type != op) + if (entry->type != op) changed = 1; else goto out; @@ -335,18 +327,21 @@ static ssize_t ksym_trace_filter_write(struct file *file, } } if (changed) { - unregister_kernel_hw_breakpoint(entry->ksym_hbp); - entry->ksym_hbp->info.type = op; + unregister_wide_hw_breakpoint(entry->ksym_hbp); + entry->type = op; if (op > 0) { - ret = register_kernel_hw_breakpoint(entry->ksym_hbp); - if (ret == 0) + entry->ksym_hbp = + register_wide_hw_breakpoint(entry->ksym_addr, + entry->len, entry->type, + ksym_hbp_handler, true); + if (IS_ERR(entry->ksym_hbp)) + entry->ksym_hbp = NULL; + if (!entry->ksym_hbp) goto out; } ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); - kfree(entry->ksym_hbp->info.name); - kfree(entry->ksym_hbp); kfree(entry); ret = 0; goto out; @@ -413,16 +408,16 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd, - entry->pid, iter->cpu, field->ksym_name); + ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, + entry->pid, iter->cpu, (char *)field->addr); if (!ret) return TRACE_TYPE_PARTIAL_LINE; switch (field->type) { - case HW_BREAKPOINT_WRITE: + case HW_BREAKPOINT_W: ret = trace_seq_printf(s, " W "); break; - case HW_BREAKPOINT_RW: + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: ret = trace_seq_printf(s, " RW "); break; default: @@ -490,14 +485,13 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); - if (entry->ksym_hbp) - access_type = entry->ksym_hbp->info.type; + access_type = entry->type; switch (access_type) { - case HW_BREAKPOINT_WRITE: + case HW_BREAKPOINT_W: seq_puts(m, " W "); break; - case HW_BREAKPOINT_RW: + case HW_BREAKPOINT_R | HW_BREAKPOINT_W: seq_puts(m, " RW "); break; default: diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 7179c12..27c5072 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) ksym_selftest_dummy = 0; /* Register the read-write tracing request */ - ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW, + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, + HW_BREAKPOINT_R | HW_BREAKPOINT_W, (unsigned long)(&ksym_selftest_dummy)); if (ret < 0) { -- cgit v0.10.2 From ba1c813a6b9a0ef14d7112daf51270eff326f037 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 10 Sep 2009 09:26:21 +0200 Subject: hw-breakpoints: Arbitrate access to pmu following registers constraints Allow or refuse to build a counter using the breakpoints pmu following given constraints. We keep track of the pmu users by using three per cpu variables: - nr_cpu_bp_pinned stores the number of pinned cpu breakpoints counters in the given cpu - nr_bp_flexible stores the number of non-pinned breakpoints counters in the given cpu. - task_bp_pinned stores the number of pinned task breakpoints in a cpu The latter is not a simple counter but gathers the number of tasks that have n pinned breakpoints. Considering HBP_NUM the number of available breakpoint address registers: task_bp_pinned[0] is the number of tasks having 1 breakpoint task_bp_pinned[1] is the number of tasks having 2 breakpoints [...] task_bp_pinned[HBP_NUM - 1] is the number of tasks having the maximum number of registers (HBP_NUM). When a breakpoint counter is created and wants an access to the pmu, we evaluate the following constraints: == Non-pinned counter == - If attached to a single cpu, check: (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM -> If there are already non-pinned counters in this cpu, it means there is already a free slot for them. Otherwise, we check that the maximum number of per task breakpoints (for this cpu) plus the number of per cpu breakpoint (for this cpu) doesn't cover every registers. - If attached to every cpus, check: (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM -> This is roughly the same, except we check the number of per cpu bp for every cpu and we keep the max one. Same for the per tasks breakpoints. == Pinned counter == - If attached to a single cpu, check: ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM -> Same checks as before. But now the nr_bp_flexible, if any, must keep one register at least (or flexible breakpoints will never be be fed). - If attached to every cpus, check: ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) + max(per_cpu(task_bp_pinned, *))) < HBP_NUM Changes in v2: - Counter -> event rename Changes in v5: - Fix unreleased non-pinned task-bound-only counters. We only released it in the first cpu. (Thanks to Paul Mackerras for reporting that) Changes in v6: - Currently, events scheduling are done in this order: cpu context pinned + cpu context non-pinned + task context pinned + task context non-pinned events. Then our current constraints are right theoretically but not in practice, because non-pinned counters may be scheduled before we can apply every possible pinned counters. So consider non-pinned counters as pinned for now. Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 08f6d01..e662dc9 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -16,6 +16,8 @@ * Copyright (C) 2007 Alan Stern * Copyright (C) IBM Corporation, 2009 * Copyright (C) 2009, Frederic Weisbecker + * + * Thanks to Ingo Molnar for his many suggestions. */ /* @@ -44,24 +46,221 @@ #include #endif -static atomic_t bp_slot; +/* + * Constraints data + */ + +/* Number of pinned cpu breakpoints in a cpu */ +static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); -int reserve_bp_slot(struct perf_event *bp) +/* Number of pinned task breakpoints in a cpu */ +static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); + +/* Number of non-pinned cpu/task breakpoints in a cpu */ +static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); + +/* Gather the number of total pinned and un-pinned bp in a cpuset */ +struct bp_busy_slots { + unsigned int pinned; + unsigned int flexible; +}; + +/* Serialize accesses to the above constraints */ +static DEFINE_MUTEX(nr_bp_mutex); + +/* + * Report the maximum number of pinned breakpoints a task + * have in this cpu + */ +static unsigned int max_task_bp_pinned(int cpu) { - if (atomic_inc_return(&bp_slot) == HBP_NUM) { - atomic_dec(&bp_slot); + int i; + unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); - return -ENOSPC; + for (i = HBP_NUM -1; i >= 0; i--) { + if (tsk_pinned[i] > 0) + return i + 1; } return 0; } +/* + * Report the number of pinned/un-pinned breakpoints we have in + * a given cpu (cpu > -1) or in all of them (cpu = -1). + */ +static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) +{ + if (cpu >= 0) { + slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); + slots->pinned += max_task_bp_pinned(cpu); + slots->flexible = per_cpu(nr_bp_flexible, cpu); + + return; + } + + for_each_online_cpu(cpu) { + unsigned int nr; + + nr = per_cpu(nr_cpu_bp_pinned, cpu); + nr += max_task_bp_pinned(cpu); + + if (nr > slots->pinned) + slots->pinned = nr; + + nr = per_cpu(nr_bp_flexible, cpu); + + if (nr > slots->flexible) + slots->flexible = nr; + } +} + +/* + * Add a pinned breakpoint for the given task in our constraint table + */ +static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) +{ + int count = 0; + struct perf_event *bp; + struct perf_event_context *ctx = tsk->perf_event_ctxp; + unsigned int *task_bp_pinned; + struct list_head *list; + unsigned long flags; + + if (WARN_ONCE(!ctx, "No perf context for this task")) + return; + + list = &ctx->event_list; + + spin_lock_irqsave(&ctx->lock, flags); + + /* + * The current breakpoint counter is not included in the list + * at the open() callback time + */ + list_for_each_entry(bp, list, event_entry) { + if (bp->attr.type == PERF_TYPE_BREAKPOINT) + count++; + } + + spin_unlock_irqrestore(&ctx->lock, flags); + + if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) + return; + + task_bp_pinned = per_cpu(task_bp_pinned, cpu); + if (enable) { + task_bp_pinned[count]++; + if (count > 0) + task_bp_pinned[count-1]--; + } else { + task_bp_pinned[count]--; + if (count > 0) + task_bp_pinned[count-1]++; + } +} + +/* + * Add/remove the given breakpoint in our constraint table + */ +static void toggle_bp_slot(struct perf_event *bp, bool enable) +{ + int cpu = bp->cpu; + struct task_struct *tsk = bp->ctx->task; + + /* Pinned counter task profiling */ + if (tsk) { + if (cpu >= 0) { + toggle_bp_task_slot(tsk, cpu, enable); + return; + } + + for_each_online_cpu(cpu) + toggle_bp_task_slot(tsk, cpu, enable); + return; + } + + /* Pinned counter cpu profiling */ + if (enable) + per_cpu(nr_cpu_bp_pinned, bp->cpu)++; + else + per_cpu(nr_cpu_bp_pinned, bp->cpu)--; +} + +/* + * Contraints to check before allowing this new breakpoint counter: + * + * == Non-pinned counter == (Considered as pinned for now) + * + * - If attached to a single cpu, check: + * + * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) + * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM + * + * -> If there are already non-pinned counters in this cpu, it means + * there is already a free slot for them. + * Otherwise, we check that the maximum number of per task + * breakpoints (for this cpu) plus the number of per cpu breakpoint + * (for this cpu) doesn't cover every registers. + * + * - If attached to every cpus, check: + * + * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) + * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM + * + * -> This is roughly the same, except we check the number of per cpu + * bp for every cpu and we keep the max one. Same for the per tasks + * breakpoints. + * + * + * == Pinned counter == + * + * - If attached to a single cpu, check: + * + * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) + * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM + * + * -> Same checks as before. But now the nr_bp_flexible, if any, must keep + * one register at least (or they will never be fed). + * + * - If attached to every cpus, check: + * + * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) + * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM + */ +int reserve_bp_slot(struct perf_event *bp) +{ + struct bp_busy_slots slots = {0}; + int ret = 0; + + mutex_lock(&nr_bp_mutex); + + fetch_bp_busy_slots(&slots, bp->cpu); + + /* Flexible counters need to keep at least one slot */ + if (slots.pinned + (!!slots.flexible) == HBP_NUM) { + ret = -ENOSPC; + goto end; + } + + toggle_bp_slot(bp, true); + +end: + mutex_unlock(&nr_bp_mutex); + + return ret; +} + void release_bp_slot(struct perf_event *bp) { - atomic_dec(&bp_slot); + mutex_lock(&nr_bp_mutex); + + toggle_bp_slot(bp, false); + + mutex_unlock(&nr_bp_mutex); } + int __register_perf_hw_breakpoint(struct perf_event *bp) { int ret; -- cgit v0.10.2 From 30ff21e31fe5c8b7b1b7d30cc41e32bc4ee9f175 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 10 Sep 2009 09:35:20 +0800 Subject: ksym_tracer: Remove KSYM_SELFTEST_ENTRY The macro used to be used in both trace_selftest.c and trace_ksym.c, but no longer, so remove it from header file. Signed-off-by: Li Zefan Cc: Prasad Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d72f06f..ee00475 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -367,7 +367,6 @@ int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); int is_tracing_stopped(void); -#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy" extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); extern unsigned long nsecs_to_usecs(unsigned long nsecs); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 27c5072..dc98309 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) ksym_selftest_dummy = 0; /* Register the read-write tracing request */ - ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, + + ret = process_new_ksym_entry("ksym_selftest_dummy", HW_BREAKPOINT_R | HW_BREAKPOINT_W, (unsigned long)(&ksym_selftest_dummy)); -- cgit v0.10.2 From 46dc281b1bb02527195fe2ad50a3af6d7f7f7325 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 8 Nov 2009 18:53:56 +0300 Subject: x86, apic: Use PAGE_SIZE instead of numbers The whole page is reserved for IO-APIC fixmap due to non-cacheable requirement. So lets note this explicitly instead of playing with numbers. Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu Cc: Maciej W. Rozycki LKML-Reference: <20091108155356.GB25940@lenovo> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 31e9db3..9ee1c16 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4111,7 +4111,7 @@ fake_ioapic_page: idx++; ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res->end = ioapic_phys + PAGE_SIZE-1; ioapic_res++; } } -- cgit v0.10.2 From 4343fe1024e09e17667f95620ed3e69a7a5f4389 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 8 Nov 2009 18:54:31 +0300 Subject: x86, ioapic: Use snrpintf while set names for IO-APIC resourses We should be ready that one day MAX_IO_APICS may raise its number. To prevent memory overwrite we're to use safe snprintf while set IO-APIC resourse name. Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu LKML-Reference: <20091108155431.GC25940@lenovo> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9ee1c16..24d1458 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4066,7 +4066,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) for (i = 0; i < nr_ioapics; i++) { res[i].name = mem; res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); + snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; } -- cgit v0.10.2 From afe61f677866ffc484e69c4ecca2d316d564d78b Mon Sep 17 00:00:00 2001 From: Clark Williams Date: Sun, 8 Nov 2009 09:01:37 -0600 Subject: perf tools: Add debugfs utility routines for perf Add routines to locate the debugfs mount point and to manage the mounting and unmounting of the debugfs. Signed-off-by: Clark Williams Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra LKML-Reference: <20091101155621.2b3503ee@torg> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 542b29e..b9509b1 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -353,6 +353,7 @@ LIB_H += util/include/asm/swab.h LIB_H += util/include/asm/system.h LIB_H += util/include/asm/uaccess.h LIB_H += perf.h +LIB_H += util/debugfs.h LIB_H += util/event.h LIB_H += util/types.h LIB_H += util/levenshtein.h @@ -378,6 +379,7 @@ LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o LIB_OBJS += util/config.o LIB_OBJS += util/ctype.o +LIB_OBJS += util/debugfs.o LIB_OBJS += util/environment.o LIB_OBJS += util/event.o LIB_OBJS += util/exec_cmd.o diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c new file mode 100644 index 0000000..06b73ee --- /dev/null +++ b/tools/perf/util/debugfs.c @@ -0,0 +1,241 @@ +#include "util.h" +#include "debugfs.h" +#include "cache.h" + +static int debugfs_premounted; +static char debugfs_mountpoint[MAX_PATH+1]; + +static const char *debugfs_known_mountpoints[] = { + "/sys/kernel/debug/", + "/debug/", + 0, +}; + +/* use this to force a umount */ +void debugfs_force_cleanup(void) +{ + debugfs_find_mountpoint(); + debugfs_premounted = 0; + debugfs_umount(); +} + +/* construct a full path to a debugfs element */ +int debugfs_make_path(const char *element, char *buffer, int size) +{ + int len; + + if (strlen(debugfs_mountpoint) == 0) { + buffer[0] = '\0'; + return -1; + } + + len = strlen(debugfs_mountpoint) + strlen(element) + 1; + if (len >= size) + return len+1; + + snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element); + return 0; +} + +static int debugfs_found; + +/* find the path to the mounted debugfs */ +const char *debugfs_find_mountpoint(void) +{ + const char **ptr; + char type[100]; + FILE *fp; + + if (debugfs_found) + return (const char *) debugfs_mountpoint; + + ptr = debugfs_known_mountpoints; + while (*ptr) { + if (debugfs_valid_mountpoint(*ptr) == 0) { + debugfs_found = 1; + strcpy(debugfs_mountpoint, *ptr); + return debugfs_mountpoint; + } + ptr++; + } + + /* give up and parse /proc/mounts */ + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + die("Can't open /proc/mounts for read"); + + while (fscanf(fp, "%*s %" + STR(MAX_PATH) + "s %99s %*s %*d %*d\n", + debugfs_mountpoint, type) == 2) { + if (strcmp(type, "debugfs") == 0) + break; + } + fclose(fp); + + if (strcmp(type, "debugfs") != 0) + return NULL; + + debugfs_found = 1; + + return debugfs_mountpoint; +} + +/* verify that a mountpoint is actually a debugfs instance */ + +int debugfs_valid_mountpoint(const char *debugfs) +{ + struct statfs st_fs; + + if (statfs(debugfs, &st_fs) < 0) + return -ENOENT; + else if (st_fs.f_type != (long) DEBUGFS_MAGIC) + return -ENOENT; + + return 0; +} + + +int debugfs_valid_entry(const char *path) +{ + struct stat st; + + if (stat(path, &st)) + return -errno; + + return 0; +} + +/* mount the debugfs somewhere */ + +int debugfs_mount(const char *mountpoint) +{ + char mountcmd[128]; + + /* see if it's already mounted */ + if (debugfs_find_mountpoint()) { + debugfs_premounted = 1; + return 0; + } + + /* if not mounted and no argument */ + if (mountpoint == NULL) { + /* see if environment variable set */ + mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT); + /* if no environment variable, use default */ + if (mountpoint == NULL) + mountpoint = "/sys/kernel/debug"; + } + + /* save the mountpoint */ + strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint)); + + /* mount it */ + snprintf(mountcmd, sizeof(mountcmd), + "/bin/mount -t debugfs debugfs %s", mountpoint); + return system(mountcmd); +} + +/* umount the debugfs */ + +int debugfs_umount(void) +{ + char umountcmd[128]; + int ret; + + /* if it was already mounted, leave it */ + if (debugfs_premounted) + return 0; + + /* make sure it's a valid mount point */ + ret = debugfs_valid_mountpoint(debugfs_mountpoint); + if (ret) + return ret; + + snprintf(umountcmd, sizeof(umountcmd), + "/bin/umount %s", debugfs_mountpoint); + return system(umountcmd); +} + +int debugfs_write(const char *entry, const char *value) +{ + char path[MAX_PATH+1]; + int ret, count; + int fd; + + /* construct the path */ + snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); + + /* verify that it exists */ + ret = debugfs_valid_entry(path); + if (ret) + return ret; + + /* get how many chars we're going to write */ + count = strlen(value); + + /* open the debugfs entry */ + fd = open(path, O_RDWR); + if (fd < 0) + return -errno; + + while (count > 0) { + /* write it */ + ret = write(fd, value, count); + if (ret <= 0) { + if (ret == EAGAIN) + continue; + close(fd); + return -errno; + } + count -= ret; + } + + /* close it */ + close(fd); + + /* return success */ + return 0; +} + +/* + * read a debugfs entry + * returns the number of chars read or a negative errno + */ +int debugfs_read(const char *entry, char *buffer, size_t size) +{ + char path[MAX_PATH+1]; + int ret; + int fd; + + /* construct the path */ + snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry); + + /* verify that it exists */ + ret = debugfs_valid_entry(path); + if (ret) + return ret; + + /* open the debugfs entry */ + fd = open(path, O_RDONLY); + if (fd < 0) + return -errno; + + do { + /* read it */ + ret = read(fd, buffer, size); + if (ret == 0) { + close(fd); + return EOF; + } + } while (ret < 0 && errno == EAGAIN); + + /* close it */ + close(fd); + + /* make *sure* there's a null character at the end */ + buffer[ret] = '\0'; + + /* return the number of chars read */ + return ret; +} diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h new file mode 100644 index 0000000..3cd14f9 --- /dev/null +++ b/tools/perf/util/debugfs.h @@ -0,0 +1,25 @@ +#ifndef __DEBUGFS_H__ +#define __DEBUGFS_H__ + +#include + +#ifndef MAX_PATH +# define MAX_PATH 256 +#endif + +#ifndef STR +# define _STR(x) #x +# define STR(x) _STR(x) +#endif + +extern const char *debugfs_find_mountpoint(void); +extern int debugfs_valid_mountpoint(const char *debugfs); +extern int debugfs_valid_entry(const char *path); +extern int debugfs_mount(const char *mountpoint); +extern int debugfs_umount(void); +extern int debugfs_write(const char *entry, const char *value); +extern int debugfs_read(const char *entry, char *buffer, size_t size); +extern void debugfs_force_cleanup(void); +extern int debugfs_make_path(const char *element, char *buffer, int size); + +#endif /* __DEBUGFS_H__ */ -- cgit v0.10.2 From 549104f22b3cd4761145eb5fba6ee4d59822da61 Mon Sep 17 00:00:00 2001 From: Clark Williams Date: Sun, 8 Nov 2009 09:03:07 -0600 Subject: perf tools: Modify perf routines to use new debugfs routines modify perf.c get_debugfs_mntpnt() to use the util/debugfs.c debugfs_find_mountpoint() modify util/parse-events.c to use debugfs_valid_mountpoint(). Signed-off-by: Clark Williams Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra LKML-Reference: <20091101155720.624cc87e@torg> Signed-off-by: Ingo Molnar diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 624e62d..601f403 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -14,6 +14,7 @@ #include "util/run-command.h" #include "util/parse-events.h" #include "util/string.h" +#include "util/debugfs.h" const char perf_usage_string[] = "perf [--version] [--help] COMMAND [ARGS]"; @@ -382,45 +383,12 @@ static int run_argv(int *argcp, const char ***argv) /* mini /proc/mounts parser: searching for "^blah /mount/point debugfs" */ static void get_debugfs_mntpt(void) { - FILE *file; - char fs_type[100]; - char debugfs[MAXPATHLEN]; + const char *path = debugfs_find_mountpoint(); - /* - * try the standard location - */ - if (valid_debugfs_mount("/sys/kernel/debug/") == 0) { - strcpy(debugfs_mntpt, "/sys/kernel/debug/"); - return; - } - - /* - * try the sane location - */ - if (valid_debugfs_mount("/debug/") == 0) { - strcpy(debugfs_mntpt, "/debug/"); - return; - } - - /* - * give up and parse /proc/mounts - */ - file = fopen("/proc/mounts", "r"); - if (file == NULL) - return; - - while (fscanf(file, "%*s %" - STR(MAXPATHLEN) - "s %99s %*s %*d %*d\n", - debugfs, fs_type) == 2) { - if (strcmp(fs_type, "debugfs") == 0) - break; - } - fclose(file); - if (strcmp(fs_type, "debugfs") == 0) { - strncpy(debugfs_mntpt, debugfs, MAXPATHLEN); - debugfs_mntpt[MAXPATHLEN - 1] = '\0'; - } + if (path) + strncpy(debugfs_mntpt, path, sizeof(debugfs_mntpt)); + else + debugfs_mntpt[0] = '\0'; } int main(int argc, const char **argv) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 31baa5a..097938a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -7,6 +7,7 @@ #include "string.h" #include "cache.h" #include "header.h" +#include "debugfs.h" int nr_counters; @@ -149,16 +150,6 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) #define MAX_EVENT_LENGTH 512 -int valid_debugfs_mount(const char *debugfs) -{ - struct statfs st_fs; - - if (statfs(debugfs, &st_fs) < 0) - return -ENOENT; - else if (st_fs.f_type != (long) DEBUGFS_MAGIC) - return -ENOENT; - return 0; -} struct tracepoint_path *tracepoint_id_to_path(u64 config) { @@ -171,7 +162,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (valid_debugfs_mount(debugfs_path)) + if (debugfs_valid_mountpoint(debugfs_path)) return NULL; sys_dir = opendir(debugfs_path); @@ -510,7 +501,7 @@ static enum event_result parse_tracepoint_event(const char **strp, char sys_name[MAX_EVENT_LENGTH]; unsigned int sys_length, evt_length; - if (valid_debugfs_mount(debugfs_path)) + if (debugfs_valid_mountpoint(debugfs_path)) return 0; evt_name = strchr(*strp, ':'); @@ -788,7 +779,7 @@ static void print_tracepoint_events(void) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (valid_debugfs_mount(debugfs_path)) + if (debugfs_valid_mountpoint(debugfs_path)) return; sys_dir = opendir(debugfs_path); -- cgit v0.10.2 From 7d2e8d00b47b973c92db4df7444d5e6d3bb945f9 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Oct 2009 18:22:32 +0200 Subject: pcmcia: use pre-determined values A few PCMCIA network drivers can make use of values provided by the pcmcia core, instead of tedious, independent CIS parsing. xirc32ps_cs.c: manf_id hostap_cs.c: multifunction count b43/pcmcia.c: ConfigBase address and "Present" smc91c92_cs.c: By default, mhz_setup() can use VERS_1 as it is stored in struct pcmcia_device. Only some cards require workarounds, such as reading out VERS_1 twice. CC: David S. Miller CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org Acked-by: John W. Linville Signed-off-by: Dominik Brodowski diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index 7bde2cd..af03759 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -545,6 +545,14 @@ static int mhz_setup(struct pcmcia_device *link) u_char *buf, *station_addr; int rc; + /* Read the station address from the CIS. It is stored as the last + (fourth) string in the Version 1 Version/ID tuple. */ + if ((link->prod_id[3]) && + (cvt_ascii_address(dev, link->prod_id[3]) == 0)) + return 0; + + /* Workarounds for broken cards start here. */ + cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL); if (!cfg_mem) return -1; @@ -557,8 +565,7 @@ static int mhz_setup(struct pcmcia_device *link) tuple->TupleData = (cisdata_t *)buf; tuple->TupleDataMax = 255; - /* Read the station address from the CIS. It is stored as the last - (fourth) string in the Version 1 Version/ID tuple. */ + /* Ugh -- the EM1144 card has two VERS_1 tuples!?! */ tuple->DesiredTuple = CISTPL_VERS_1; if (first_tuple(link, tuple, parse) != 0) { rc = -1; diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index cf84231..5e203230 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -792,13 +792,12 @@ xirc2ps_config(struct pcmcia_device * link) tuple.TupleOffset = 0; /* Is this a valid card */ - tuple.DesiredTuple = CISTPL_MANFID; - if ((err=first_tuple(link, &tuple, &parse))) { + if (link->has_manf_id == 0) { printk(KNOT_XIRC "manfid not found in CIS\n"); goto failure; } - switch(parse.manfid.manf) { + switch (link->manf_id) { case MANFID_XIRCOM: local->manf_str = "Xircom"; break; @@ -822,6 +821,13 @@ xirc2ps_config(struct pcmcia_device * link) } DEBUG(0, "found %s card\n", local->manf_str); + /* needed for the additional fields to be parsed by set_card_type() */ + tuple.DesiredTuple = CISTPL_MANFID; + err = first_tuple(link, &tuple, &parse) + if (err) { + printk(KNOT_XIRC "manfid not found in CIS\n"); + goto failure; + } if (!set_card_type(link, buf)) { printk(KNOT_XIRC "this card is not supported\n"); goto failure; diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c index 6c3a749..cd14b7e 100644 --- a/drivers/net/wireless/b43/pcmcia.c +++ b/drivers/net/wireless/b43/pcmcia.c @@ -65,35 +65,15 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev) struct ssb_bus *ssb; win_req_t win; memreq_t mem; - tuple_t tuple; - cisparse_t parse; int err = -ENOMEM; int res = 0; - unsigned char buf[64]; ssb = kzalloc(sizeof(*ssb), GFP_KERNEL); if (!ssb) goto out_error; err = -ENODEV; - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - res = pcmcia_get_first_tuple(dev, &tuple); - if (res != 0) - goto err_kfree_ssb; - res = pcmcia_get_tuple_data(dev, &tuple); - if (res != 0) - goto err_kfree_ssb; - res = pcmcia_parse_tuple(&tuple, &parse); - if (res != 0) - goto err_kfree_ssb; - - dev->conf.ConfigBase = parse.config.base; - dev->conf.Present = parse.config.rmask[0]; dev->conf.Attributes = CONF_ENABLE_IRQ; dev->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c index ad8eab4..31b60dd 100644 --- a/drivers/net/wireless/hostap/hostap_cs.c +++ b/drivers/net/wireless/hostap/hostap_cs.c @@ -274,9 +274,6 @@ static int sandisk_enable_wireless(struct net_device *dev) conf_reg_t reg; struct hostap_interface *iface = netdev_priv(dev); local_info_t *local = iface->local; - tuple_t tuple; - cisparse_t *parse = NULL; - u_char buf[64]; struct hostap_cs_priv *hw_priv = local->hw_priv; if (hw_priv->link->io.NumPorts1 < 0x42) { @@ -285,28 +282,13 @@ static int sandisk_enable_wireless(struct net_device *dev) goto done; } - parse = kmalloc(sizeof(cisparse_t), GFP_KERNEL); - if (parse == NULL) { - ret = -ENOMEM; - goto done; - } - - tuple.Attributes = TUPLE_RETURN_COMMON; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - if (hw_priv->link->manf_id != 0xd601 || hw_priv->link->card_id != 0x0101) { /* No SanDisk manfid found */ ret = -ENODEV; goto done; } - tuple.DesiredTuple = CISTPL_LONGLINK_MFC; - if (pcmcia_get_first_tuple(hw_priv->link, &tuple) || - pcmcia_get_tuple_data(hw_priv->link, &tuple) || - pcmcia_parse_tuple(&tuple, parse) || - parse->longlink_mfc.nfn < 2) { + if (hw_priv->link->socket->functions < 2) { /* No multi-function links found */ ret = -ENODEV; goto done; @@ -354,7 +336,6 @@ static int sandisk_enable_wireless(struct net_device *dev) udelay(10); done: - kfree(parse); return ret; } -- cgit v0.10.2 From aaa8cfdada648a6bae32f62df76cc60137a2b323 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Oct 2009 18:28:39 +0200 Subject: pcmcia: use pcmcia_loop_config in misc pcmcia drivers Use pcmcia_loop_config() in a few drivers missed during the first round. On fmvj18x_cs.c it -- strangely -- only requries us to set conf.ConfigIndex, which is done by the core, so include an empty loop function which returns 0 unconditionally. CC: David S. Miller CC: David Sterba CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org For the ipwireless part: Acked-by: Jiri Kosina Acked-by: John W. Linville Signed-off-by: Dominik Brodowski diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 5216fce..263a18f 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -79,14 +79,32 @@ static void signalled_reboot_callback(void *callback_data) schedule_work(&ipw->work_reboot); } +static int ipwireless_ioprobe(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + p_dev->io.BasePort1 = cfg->io.win[0].base; + p_dev->io.NumPorts1 = cfg->io.win[0].len; + p_dev->io.IOAddrLines = 16; + + p_dev->irq.IRQInfo1 = cfg->irq.IRQInfo1; + + /* 0x40 causes it to generate level mode interrupts. */ + /* 0x04 enables IREQ pin. */ + p_dev->conf.ConfigIndex = cfg->index | 0x44; + return pcmcia_request_io(p_dev, &p_dev->io); +} + static int config_ipwireless(struct ipw_dev *ipw) { struct pcmcia_device *link = ipw->link; - int ret; + int ret = 0; tuple_t tuple; unsigned short buf[64]; cisparse_t parse; - unsigned short cor_value; memreq_t memreq_attr_memory; memreq_t memreq_common_memory; @@ -97,103 +115,26 @@ static int config_ipwireless(struct ipw_dev *ipw) tuple.TupleDataMax = sizeof(buf); tuple.TupleOffset = 0; - tuple.DesiredTuple = RETURN_FIRST_TUPLE; - - ret = pcmcia_get_first_tuple(link, &tuple); - - while (ret == 0) { - ret = pcmcia_get_tuple_data(link, &tuple); - - if (ret != 0) { - cs_error(link, GetTupleData, ret); - goto exit0; - } - ret = pcmcia_get_next_tuple(link, &tuple); - } - - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - - ret = pcmcia_get_first_tuple(link, &tuple); - - if (ret != 0) { - cs_error(link, GetFirstTuple, ret); - goto exit0; - } - - ret = pcmcia_get_tuple_data(link, &tuple); - - if (ret != 0) { - cs_error(link, GetTupleData, ret); - goto exit0; - } - - ret = pcmcia_parse_tuple(&tuple, &parse); - - if (ret != 0) { - cs_error(link, ParseTuple, ret); - goto exit0; - } - - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - link->io.BasePort1 = parse.cftable_entry.io.win[0].base; - link->io.NumPorts1 = parse.cftable_entry.io.win[0].len; - link->io.IOAddrLines = 16; - - link->irq.IRQInfo1 = parse.cftable_entry.irq.IRQInfo1; - - /* 0x40 causes it to generate level mode interrupts. */ - /* 0x04 enables IREQ pin. */ - cor_value = parse.cftable_entry.index | 0x44; - link->conf.ConfigIndex = cor_value; - - /* IRQ and I/O settings */ - tuple.DesiredTuple = CISTPL_CONFIG; - - ret = pcmcia_get_first_tuple(link, &tuple); - + ret = pcmcia_loop_config(link, ipwireless_ioprobe, NULL); if (ret != 0) { - cs_error(link, GetFirstTuple, ret); - goto exit0; - } - - ret = pcmcia_get_tuple_data(link, &tuple); - - if (ret != 0) { - cs_error(link, GetTupleData, ret); + cs_error(link, RequestIO, ret); goto exit0; } - ret = pcmcia_parse_tuple(&tuple, &parse); - - if (ret != 0) { - cs_error(link, GetTupleData, ret); - goto exit0; - } link->conf.Attributes = CONF_ENABLE_IRQ; - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; link->conf.IntType = INT_MEMORY_AND_IO; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; link->irq.Handler = ipwireless_interrupt; link->irq.Instance = ipw->hardware; - ret = pcmcia_request_io(link, &link->io); - - if (ret != 0) { - cs_error(link, RequestIO, ret); - goto exit0; - } - request_region(link->io.BasePort1, link->io.NumPorts1, IPWIRELESS_PCCARD_NAME); /* memory settings */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; ret = pcmcia_get_first_tuple(link, &tuple); - if (ret != 0) { cs_error(link, GetFirstTuple, ret); goto exit1; diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index caf6e4d..429b731 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -575,55 +575,39 @@ static int mgslpc_probe(struct pcmcia_device *link) #define CS_CHECK(fn, ret) \ do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) +static int mgslpc_ioprobe(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + if (cfg->io.nwin > 0) { + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(cfg->io.flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(cfg->io.flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = cfg->io.win[0].base; + p_dev->io.NumPorts1 = cfg->io.win[0].len; + return pcmcia_request_io(p_dev, &p_dev->io); + } + return -ENODEV; +} + static int mgslpc_config(struct pcmcia_device *link) { MGSLPC_INFO *info = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_fn, last_ret; - u_char buf[64]; - cistpl_cftable_entry_t dflt = { 0 }; - cistpl_cftable_entry_t *cfg; + int last_fn = RequestIO; + int last_ret; if (debug_level >= DEBUG_LEVEL_INFO) printk("mgslpc_config(0x%p)\n", link); - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - /* get CIS configuration entry */ - - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple)); - - cfg = &(parse.cftable_entry); - CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple)); - CS_CHECK(ParseTuple, pcmcia_parse_tuple(&tuple, &parse)); - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) dflt = *cfg; - if (cfg->index == 0) + last_ret = pcmcia_loop_config(link, mgslpc_ioprobe, NULL); + if (last_ret != 0) goto cs_failed; - link->conf.ConfigIndex = cfg->index; - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - CS_CHECK(RequestIO, pcmcia_request_io(link, &link->io)); - } - link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 8; diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 7e01fbd..c7a2bbf 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -341,14 +341,23 @@ static int ungermann_try_io_port(struct pcmcia_device *link) return ret; /* RequestIO failed */ } +static int fmvj18x_ioprobe(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + return 0; /* strange, but that's what the code did already before... */ +} + + static int fmvj18x_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; local_info_t *lp = netdev_priv(dev); tuple_t tuple; - cisparse_t parse; u_short buf[32]; - int i, last_fn = 0, last_ret = 0, ret; + int i, last_fn = RequestIO, last_ret = 0, ret; unsigned int ioaddr; cardtype_t cardtype; char *card_name = "unknown"; @@ -362,12 +371,11 @@ static int fmvj18x_config(struct pcmcia_device *link) tuple.DesiredTuple = CISTPL_FUNCE; tuple.TupleOffset = 0; if (pcmcia_get_first_tuple(link, &tuple) == 0) { + last_ret = pcmcia_loop_config(link, fmvj18x_ioprobe, NULL); + if (last_ret != 0) + goto cs_failed; + /* Yes, I have CISTPL_FUNCE. Let's check CISTPL_MANFID */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple)); - CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple)); - CS_CHECK(ParseTuple, pcmcia_parse_tuple(&tuple, &parse)); - link->conf.ConfigIndex = parse.cftable_entry.index; switch (link->manf_id) { case MANFID_TDK: cardtype = TDK; diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c index 6238176..cb40c38 100644 --- a/drivers/net/wireless/libertas/if_cs.c +++ b/drivers/net/wireless/libertas/if_cs.c @@ -793,18 +793,37 @@ static void if_cs_release(struct pcmcia_device *p_dev) * configure the card at this point -- we wait until we receive a card * insertion event. */ + +static int if_cs_ioprobe(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + p_dev->io.BasePort1 = cfg->io.win[0].base; + p_dev->io.NumPorts1 = cfg->io.win[0].len; + + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + if (cfg->io.nwin != 1) { + lbs_pr_err("wrong CIS (check number of IO windows)\n"); + return -ENODEV; + } + + /* This reserves IO space but doesn't actually enable it */ + return pcmcia_request_io(p_dev, &p_dev->io); +} + static int if_cs_probe(struct pcmcia_device *p_dev) { int ret = -ENOMEM; unsigned int prod_id; struct lbs_private *priv; struct if_cs_card *card; - /* CIS parsing */ - tuple_t tuple; - cisparse_t parse; - cistpl_cftable_entry_t *cfg = &parse.cftable_entry; - cistpl_io_t *io = &cfg->io; - u_char buf[64]; lbs_deb_enter(LBS_DEB_CS); @@ -823,43 +842,11 @@ static int if_cs_probe(struct pcmcia_device *p_dev) p_dev->conf.Attributes = 0; p_dev->conf.IntType = INT_MEMORY_AND_IO; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - if ((ret = pcmcia_get_first_tuple(p_dev, &tuple)) != 0 || - (ret = pcmcia_get_tuple_data(p_dev, &tuple)) != 0 || - (ret = pcmcia_parse_tuple(&tuple, &parse)) != 0) - { - lbs_pr_err("error in pcmcia_get_first_tuple etc\n"); - goto out1; - } - - p_dev->conf.ConfigIndex = cfg->index; - - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1) { - p_dev->conf.Attributes |= CONF_ENABLE_IRQ; - } - - /* IO window settings */ - if (cfg->io.nwin != 1) { - lbs_pr_err("wrong CIS (check number of IO windows)\n"); - ret = -ENODEV; + if (pcmcia_loop_config(p_dev, if_cs_ioprobe, NULL)) { + lbs_pr_err("error in pcmcia_loop_config\n"); goto out1; } - p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - p_dev->io.BasePort1 = io->win[0].base; - p_dev->io.NumPorts1 = io->win[0].len; - /* This reserves IO space but doesn't actually enable it */ - ret = pcmcia_request_io(p_dev, &p_dev->io); - if (ret) { - lbs_pr_err("error in pcmcia_request_io\n"); - goto out1; - } /* * Allocate an interrupt line. Note that this does not assign -- cgit v0.10.2 From af757923a92e6e9dbfdb6b0264be14c564e1c466 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Oct 2009 19:48:39 +0200 Subject: ipwireless: make more use of pcmcia_loop_config() Within the pcmcia_loop_config() callback, we already have all tuple data available we need. Also add a fix to release the IO resource (at least within pcmcia_loop_config() error path). CC: Jiri Kosina CC: David Sterba Signed-off-by: Dominik Brodowski diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 263a18f..0f46749 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -79,12 +79,18 @@ static void signalled_reboot_callback(void *callback_data) schedule_work(&ipw->work_reboot); } -static int ipwireless_ioprobe(struct pcmcia_device *p_dev, - cistpl_cftable_entry_t *cfg, - cistpl_cftable_entry_t *dflt, - unsigned int vcc, - void *priv_data) +static int ipwireless_probe(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) { + struct ipw_dev *ipw = priv_data; + struct resource *io_resource; + memreq_t memreq_attr_memory; + memreq_t memreq_common_memory; + int ret; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; p_dev->io.BasePort1 = cfg->io.win[0].base; p_dev->io.NumPorts1 = cfg->io.win[0].len; @@ -95,133 +101,118 @@ static int ipwireless_ioprobe(struct pcmcia_device *p_dev, /* 0x40 causes it to generate level mode interrupts. */ /* 0x04 enables IREQ pin. */ p_dev->conf.ConfigIndex = cfg->index | 0x44; - return pcmcia_request_io(p_dev, &p_dev->io); -} + ret = pcmcia_request_io(p_dev, &p_dev->io); + if (ret) + return ret; -static int config_ipwireless(struct ipw_dev *ipw) -{ - struct pcmcia_device *link = ipw->link; - int ret = 0; - tuple_t tuple; - unsigned short buf[64]; - cisparse_t parse; - memreq_t memreq_attr_memory; - memreq_t memreq_common_memory; + io_resource = request_region(p_dev->io.BasePort1, p_dev->io.NumPorts1, + IPWIRELESS_PCCARD_NAME); - ipw->is_v2_card = 0; + if (cfg->mem.nwin == 0) + return 0; - tuple.Attributes = 0; - tuple.TupleData = (cisdata_t *) buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; + ipw->request_common_memory.Attributes = + WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM | WIN_ENABLE; + ipw->request_common_memory.Base = cfg->mem.win[0].host_addr; + ipw->request_common_memory.Size = cfg->mem.win[0].len; + if (ipw->request_common_memory.Size < 0x1000) + ipw->request_common_memory.Size = 0x1000; + ipw->request_common_memory.AccessSpeed = 0; + + ret = pcmcia_request_window(&p_dev, &ipw->request_common_memory, + &ipw->handle_common_memory); - ret = pcmcia_loop_config(link, ipwireless_ioprobe, NULL); if (ret != 0) { - cs_error(link, RequestIO, ret); - goto exit0; + cs_error(p_dev, RequestWindow, ret); + goto exit1; } - link->conf.Attributes = CONF_ENABLE_IRQ; - link->conf.IntType = INT_MEMORY_AND_IO; - - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.Handler = ipwireless_interrupt; - link->irq.Instance = ipw->hardware; + memreq_common_memory.CardOffset = cfg->mem.win[0].card_addr; + memreq_common_memory.Page = 0; - request_region(link->io.BasePort1, link->io.NumPorts1, - IPWIRELESS_PCCARD_NAME); - - /* memory settings */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; + ret = pcmcia_map_mem_page(ipw->handle_common_memory, + &memreq_common_memory); - ret = pcmcia_get_first_tuple(link, &tuple); if (ret != 0) { - cs_error(link, GetFirstTuple, ret); - goto exit1; + cs_error(p_dev, MapMemPage, ret); + goto exit2; } - ret = pcmcia_get_tuple_data(link, &tuple); + ipw->is_v2_card = cfg->mem.win[0].len == 0x100; - if (ret != 0) { - cs_error(link, GetTupleData, ret); - goto exit1; - } + ipw->common_memory = ioremap(ipw->request_common_memory.Base, + ipw->request_common_memory.Size); + request_mem_region(ipw->request_common_memory.Base, + ipw->request_common_memory.Size, + IPWIRELESS_PCCARD_NAME); + + ipw->request_attr_memory.Attributes = + WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_AM | WIN_ENABLE; + ipw->request_attr_memory.Base = 0; + ipw->request_attr_memory.Size = 0; /* this used to be 0x1000 */ + ipw->request_attr_memory.AccessSpeed = 0; - ret = pcmcia_parse_tuple(&tuple, &parse); + ret = pcmcia_request_window(&p_dev, &ipw->request_attr_memory, + &ipw->handle_attr_memory); if (ret != 0) { - cs_error(link, ParseTuple, ret); - goto exit1; + cs_error(p_dev, RequestWindow, ret); + goto exit2; } - if (parse.cftable_entry.mem.nwin > 0) { - ipw->request_common_memory.Attributes = - WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM | WIN_ENABLE; - ipw->request_common_memory.Base = - parse.cftable_entry.mem.win[0].host_addr; - ipw->request_common_memory.Size = parse.cftable_entry.mem.win[0].len; - if (ipw->request_common_memory.Size < 0x1000) - ipw->request_common_memory.Size = 0x1000; - ipw->request_common_memory.AccessSpeed = 0; - - ret = pcmcia_request_window(&link, &ipw->request_common_memory, - &ipw->handle_common_memory); - - if (ret != 0) { - cs_error(link, RequestWindow, ret); - goto exit1; - } + memreq_attr_memory.CardOffset = 0; + memreq_attr_memory.Page = 0; - memreq_common_memory.CardOffset = - parse.cftable_entry.mem.win[0].card_addr; - memreq_common_memory.Page = 0; + ret = pcmcia_map_mem_page(ipw->handle_attr_memory, + &memreq_attr_memory); - ret = pcmcia_map_mem_page(ipw->handle_common_memory, - &memreq_common_memory); + if (ret != 0) { + cs_error(p_dev, MapMemPage, ret); + goto exit3; + } - if (ret != 0) { - cs_error(link, MapMemPage, ret); - goto exit1; - } + ipw->attr_memory = ioremap(ipw->request_attr_memory.Base, + ipw->request_attr_memory.Size); + request_mem_region(ipw->request_attr_memory.Base, + ipw->request_attr_memory.Size, IPWIRELESS_PCCARD_NAME); - ipw->is_v2_card = - parse.cftable_entry.mem.win[0].len == 0x100; + return 0; - ipw->common_memory = ioremap(ipw->request_common_memory.Base, +exit3: + pcmcia_release_window(ipw->handle_attr_memory); +exit2: + if (ipw->common_memory) { + release_mem_region(ipw->request_common_memory.Base, ipw->request_common_memory.Size); - request_mem_region(ipw->request_common_memory.Base, - ipw->request_common_memory.Size, IPWIRELESS_PCCARD_NAME); - - ipw->request_attr_memory.Attributes = - WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_AM | WIN_ENABLE; - ipw->request_attr_memory.Base = 0; - ipw->request_attr_memory.Size = 0; /* this used to be 0x1000 */ - ipw->request_attr_memory.AccessSpeed = 0; - - ret = pcmcia_request_window(&link, &ipw->request_attr_memory, - &ipw->handle_attr_memory); + iounmap(ipw->common_memory); + pcmcia_release_window(ipw->handle_common_memory); + } else + pcmcia_release_window(ipw->handle_common_memory); +exit1: + release_resource(io_resource); + pcmcia_disable_device(p_dev); + return -1; +} - if (ret != 0) { - cs_error(link, RequestWindow, ret); - goto exit2; - } +static int config_ipwireless(struct ipw_dev *ipw) +{ + struct pcmcia_device *link = ipw->link; + int ret = 0; - memreq_attr_memory.CardOffset = 0; - memreq_attr_memory.Page = 0; + ipw->is_v2_card = 0; - ret = pcmcia_map_mem_page(ipw->handle_attr_memory, - &memreq_attr_memory); + ret = pcmcia_loop_config(link, ipwireless_probe, ipw); + if (ret != 0) { + cs_error(link, RequestIO, ret); + return ret; + } - if (ret != 0) { - cs_error(link, MapMemPage, ret); - goto exit2; - } + link->conf.Attributes = CONF_ENABLE_IRQ; + link->conf.IntType = INT_MEMORY_AND_IO; - ipw->attr_memory = ioremap(ipw->request_attr_memory.Base, - ipw->request_attr_memory.Size); - request_mem_region(ipw->request_attr_memory.Base, ipw->request_attr_memory.Size, - IPWIRELESS_PCCARD_NAME); - } + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; + link->irq.Handler = ipwireless_interrupt; + link->irq.Instance = ipw->hardware; INIT_WORK(&ipw->work_reboot, signalled_reboot_work); @@ -234,7 +225,7 @@ static int config_ipwireless(struct ipw_dev *ipw) if (ret != 0) { cs_error(link, RequestIRQ, ret); - goto exit3; + goto exit; } printk(KERN_INFO IPWIRELESS_PCCARD_NAME ": Card type %s\n", @@ -257,12 +248,12 @@ static int config_ipwireless(struct ipw_dev *ipw) ipw->network = ipwireless_network_create(ipw->hardware); if (!ipw->network) - goto exit3; + goto exit; ipw->tty = ipwireless_tty_create(ipw->hardware, ipw->network, ipw->nodes); if (!ipw->tty) - goto exit3; + goto exit; ipwireless_init_hardware_v2_v3(ipw->hardware); @@ -274,33 +265,27 @@ static int config_ipwireless(struct ipw_dev *ipw) if (ret != 0) { cs_error(link, RequestConfiguration, ret); - goto exit4; + goto exit; } link->dev_node = &ipw->nodes[0]; return 0; -exit4: - pcmcia_disable_device(link); -exit3: +exit: if (ipw->attr_memory) { release_mem_region(ipw->request_attr_memory.Base, ipw->request_attr_memory.Size); iounmap(ipw->attr_memory); pcmcia_release_window(ipw->handle_attr_memory); - pcmcia_disable_device(link); } -exit2: if (ipw->common_memory) { release_mem_region(ipw->request_common_memory.Base, ipw->request_common_memory.Size); iounmap(ipw->common_memory); pcmcia_release_window(ipw->handle_common_memory); } -exit1: pcmcia_disable_device(link); -exit0: return -1; } -- cgit v0.10.2 From 91284224da5b15ec6c2b45e10fa5eccd1c92a204 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Oct 2009 23:32:33 +0200 Subject: pcmcia: add new CIS access helpers As a replacement to pcmcia_get_{first,next}_tuple() and pcmcia_get_tuple_data(), three new -- and easier to use -- functions are added: - pcmcia_get_tuple() to get the very first CIS entry of one type. - pcmcia_loop_tuple() to loop over all CIS entries of one type. - pcmcia_get_mac_from_cis() to read out the hardware MAC address from CISTPL_FUNCE. Only a handful of drivers need these functions anyway, as most CIS access is already handled by pcmcia_loop_config(), which now shares the same backed (pccard_loop_tuple()) with pcmcia_loop_tuple(). A pcmcia_get_mac_from_cis() bug noted by Komuro has been fixed in this revision. Signed-off-by: Dominik Brodowski diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt index 0599343..adfb83e 100644 --- a/Documentation/pcmcia/driver-changes.txt +++ b/Documentation/pcmcia/driver-changes.txt @@ -1,5 +1,12 @@ This file details changes in 2.6 which affect PCMCIA card driver authors: +* New CIS tuple access (as of 2.6.33) + Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and + pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is + only interested in one (raw) tuple, or "pcmcia_loop_tuple()" if it is + interested in all tuples of one type. To decode the MAC from CISTPL_FUNCE, + a new helper "pcmcia_get_mac_from_cis()" was added. + * New configuration loop helper (as of 2.6.28) By calling pcmcia_loop_config(), a driver can iterate over all available configuration options. During a driver's probe() phase, one doesn't need diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index 6c4a4fc..24dd3c1 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -1482,6 +1482,67 @@ done: } EXPORT_SYMBOL(pccard_read_tuple); + +/** + * pccard_loop_tuple() - loop over tuples in the CIS + * @s: the struct pcmcia_socket where the card is inserted + * @function: the device function we loop for + * @code: which CIS code shall we look for? + * @parse: buffer where the tuple shall be parsed (or NULL, if no parse) + * @priv_data: private data to be passed to the loop_tuple function. + * @loop_tuple: function to call for each CIS entry of type @function. IT + * gets passed the raw tuple, the paresed tuple (if @parse is + * set) and @priv_data. + * + * pccard_loop_tuple() loops over all CIS entries of type @function, and + * calls the @loop_tuple function for each entry. If the call to @loop_tuple + * returns 0, the loop exits. Returns 0 on success or errorcode otherwise. + */ +int pccard_loop_tuple(struct pcmcia_socket *s, unsigned int function, + cisdata_t code, cisparse_t *parse, void *priv_data, + int (*loop_tuple) (tuple_t *tuple, + cisparse_t *parse, + void *priv_data)) +{ + tuple_t tuple; + cisdata_t *buf; + int ret; + + buf = kzalloc(256, GFP_KERNEL); + if (buf == NULL) { + dev_printk(KERN_WARNING, &s->dev, "no memory to read tuple\n"); + return -ENOMEM; + } + + tuple.TupleData = buf; + tuple.TupleDataMax = 255; + tuple.TupleOffset = 0; + tuple.DesiredTuple = code; + tuple.Attributes = 0; + + ret = pccard_get_first_tuple(s, function, &tuple); + while (!ret) { + if (pccard_get_tuple_data(s, &tuple)) + goto next_entry; + + if (parse) + if (pcmcia_parse_tuple(&tuple, parse)) + goto next_entry; + + ret = loop_tuple(&tuple, parse, priv_data); + if (!ret) + break; + +next_entry: + ret = pccard_get_next_tuple(s, function, &tuple); + } + + kfree(buf); + return ret; +} +EXPORT_SYMBOL(pccard_loop_tuple); + + /*====================================================================== This tries to determine if a card has a sensible CIS. It returns diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 1f4098f..06a14c9 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -199,6 +199,13 @@ int pcmcia_replace_cis(struct pcmcia_socket *s, const u8 *data, const size_t len); int pccard_validate_cis(struct pcmcia_socket *s, unsigned int *count); +/* loop over CIS entries */ +int pccard_loop_tuple(struct pcmcia_socket *s, unsigned int function, + cisdata_t code, cisparse_t *parse, void *priv_data, + int (*loop_tuple) (tuple_t *tuple, + cisparse_t *parse, + void *priv_data)); + /* rsrc_mgr.c */ int pcmcia_validate_mem(struct pcmcia_socket *s); struct resource *pcmcia_find_io_region(unsigned long base, diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index d919e96..0bfb05a 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -885,13 +886,40 @@ EXPORT_SYMBOL(pcmcia_disable_device); struct pcmcia_cfg_mem { - tuple_t tuple; + struct pcmcia_device *p_dev; + void *priv_data; + int (*conf_check) (struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data); cisparse_t parse; - u8 buf[256]; cistpl_cftable_entry_t dflt; }; /** + * pcmcia_do_loop_config() - internal helper for pcmcia_loop_config() + * + * pcmcia_do_loop_config() is the internal callback for the call from + * pcmcia_loop_config() to pccard_loop_tuple(). Data is transferred + * by a struct pcmcia_cfg_mem. + */ +static int pcmcia_do_loop_config(tuple_t *tuple, cisparse_t *parse, void *priv) +{ + cistpl_cftable_entry_t *cfg = &parse->cftable_entry; + struct pcmcia_cfg_mem *cfg_mem = priv; + + /* default values */ + cfg_mem->p_dev->conf.ConfigIndex = cfg->index; + if (cfg->flags & CISTPL_CFTABLE_DEFAULT) + cfg_mem->dflt = *cfg; + + return cfg_mem->conf_check(cfg_mem->p_dev, cfg, &cfg_mem->dflt, + cfg_mem->p_dev->socket->socket.Vcc, + cfg_mem->priv_data); +} + +/** * pcmcia_loop_config() - loop over configuration options * @p_dev: the struct pcmcia_device which we need to loop for. * @conf_check: function to call for each configuration option. @@ -913,48 +941,173 @@ int pcmcia_loop_config(struct pcmcia_device *p_dev, void *priv_data) { struct pcmcia_cfg_mem *cfg_mem; - - tuple_t *tuple; int ret; - unsigned int vcc; cfg_mem = kzalloc(sizeof(struct pcmcia_cfg_mem), GFP_KERNEL); if (cfg_mem == NULL) return -ENOMEM; - /* get the current Vcc setting */ - vcc = p_dev->socket->socket.Vcc; + cfg_mem->p_dev = p_dev; + cfg_mem->conf_check = conf_check; + cfg_mem->priv_data = priv_data; - tuple = &cfg_mem->tuple; - tuple->TupleData = cfg_mem->buf; - tuple->TupleDataMax = 255; - tuple->TupleOffset = 0; - tuple->DesiredTuple = CISTPL_CFTABLE_ENTRY; - tuple->Attributes = 0; + ret = pccard_loop_tuple(p_dev->socket, p_dev->func, + CISTPL_CFTABLE_ENTRY, &cfg_mem->parse, + cfg_mem, pcmcia_do_loop_config); - ret = pcmcia_get_first_tuple(p_dev, tuple); - while (!ret) { - cistpl_cftable_entry_t *cfg = &cfg_mem->parse.cftable_entry; + kfree(cfg_mem); + return ret; +} +EXPORT_SYMBOL(pcmcia_loop_config); - if (pcmcia_get_tuple_data(p_dev, tuple)) - goto next_entry; - if (pcmcia_parse_tuple(tuple, &cfg_mem->parse)) - goto next_entry; +struct pcmcia_loop_mem { + struct pcmcia_device *p_dev; + void *priv_data; + int (*loop_tuple) (struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data); +}; - /* default values */ - p_dev->conf.ConfigIndex = cfg->index; - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - cfg_mem->dflt = *cfg; +/** + * pcmcia_do_loop_tuple() - internal helper for pcmcia_loop_config() + * + * pcmcia_do_loop_tuple() is the internal callback for the call from + * pcmcia_loop_tuple() to pccard_loop_tuple(). Data is transferred + * by a struct pcmcia_cfg_mem. + */ +static int pcmcia_do_loop_tuple(tuple_t *tuple, cisparse_t *parse, void *priv) +{ + struct pcmcia_loop_mem *loop = priv; - ret = conf_check(p_dev, cfg, &cfg_mem->dflt, vcc, priv_data); - if (!ret) - break; + return loop->loop_tuple(loop->p_dev, tuple, loop->priv_data); +}; + +/** + * pcmcia_loop_tuple() - loop over tuples in the CIS + * @p_dev: the struct pcmcia_device which we need to loop for. + * @code: which CIS code shall we look for? + * @priv_data: private data to be passed to the loop_tuple function. + * @loop_tuple: function to call for each CIS entry of type @function. IT + * gets passed the raw tuple and @priv_data. + * + * pcmcia_loop_tuple() loops over all CIS entries of type @function, and + * calls the @loop_tuple function for each entry. If the call to @loop_tuple + * returns 0, the loop exits. Returns 0 on success or errorcode otherwise. + */ +int pcmcia_loop_tuple(struct pcmcia_device *p_dev, cisdata_t code, + int (*loop_tuple) (struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data), + void *priv_data) +{ + struct pcmcia_loop_mem loop = { + .p_dev = p_dev, + .loop_tuple = loop_tuple, + .priv_data = priv_data}; + + return pccard_loop_tuple(p_dev->socket, p_dev->func, code, NULL, + &loop, pcmcia_do_loop_tuple); +}; +EXPORT_SYMBOL(pcmcia_loop_tuple); -next_entry: - ret = pcmcia_get_next_tuple(p_dev, tuple); + +struct pcmcia_loop_get { + size_t len; + cisdata_t **buf; +}; + +/** + * pcmcia_do_get_tuple() - internal helper for pcmcia_get_tuple() + * + * pcmcia_do_get_tuple() is the internal callback for the call from + * pcmcia_get_tuple() to pcmcia_loop_tuple(). As we're only interested in + * the first tuple, return 0 unconditionally. Create a memory buffer large + * enough to hold the content of the tuple, and fill it with the tuple data. + * The caller is responsible to free the buffer. + */ +static int pcmcia_do_get_tuple(struct pcmcia_device *p_dev, tuple_t *tuple, + void *priv) +{ + struct pcmcia_loop_get *get = priv; + + *get->buf = kzalloc(tuple->TupleDataLen, GFP_KERNEL); + if (*get->buf) { + get->len = tuple->TupleDataLen; + memcpy(*get->buf, tuple->TupleData, tuple->TupleDataLen); } + return 0; +}; + +/** + * pcmcia_get_tuple() - get first tuple from CIS + * @p_dev: the struct pcmcia_device which we need to loop for. + * @code: which CIS code shall we look for? + * @buf: pointer to store the buffer to. + * + * pcmcia_get_tuple() gets the content of the first CIS entry of type @code. + * It returns the buffer length (or zero). The caller is responsible to free + * the buffer passed in @buf. + */ +size_t pcmcia_get_tuple(struct pcmcia_device *p_dev, cisdata_t code, + unsigned char **buf) +{ + struct pcmcia_loop_get get = { + .len = 0, + .buf = buf, + }; + + *get.buf = NULL; + pcmcia_loop_tuple(p_dev, code, pcmcia_do_get_tuple, &get); + + return get.len; +}; +EXPORT_SYMBOL(pcmcia_get_tuple); + + +/** + * pcmcia_do_get_mac() - internal helper for pcmcia_get_mac_from_cis() + * + * pcmcia_do_get_mac() is the internal callback for the call from + * pcmcia_get_mac_from_cis() to pcmcia_loop_tuple(). We check whether the + * tuple contains a proper LAN_NODE_ID of length 6, and copy the data + * to struct net_device->dev_addr[i]. + */ +static int pcmcia_do_get_mac(struct pcmcia_device *p_dev, tuple_t *tuple, + void *priv) +{ + struct net_device *dev = priv; + int i; + + if (tuple->TupleData[0] != CISTPL_FUNCE_LAN_NODE_ID) + return -EINVAL; + if (tuple->TupleDataLen < ETH_ALEN + 2) { + dev_warn(&p_dev->dev, "Invalid CIS tuple length for " + "LAN_NODE_ID\n"); + return -EINVAL; + } + + if (tuple->TupleData[1] != ETH_ALEN) { + dev_warn(&p_dev->dev, "Invalid header for LAN_NODE_ID\n"); + return -EINVAL; + } + for (i = 0; i < 6; i++) + dev->dev_addr[i] = tuple->TupleData[i+2]; + return 0; +}; + +/** + * pcmcia_get_mac_from_cis() - read out MAC address from CISTPL_FUNCE + * @p_dev: the struct pcmcia_device for which we want the address. + * @dev: a properly prepared struct net_device to store the info to. + * + * pcmcia_get_mac_from_cis() reads out the hardware MAC address from + * CISTPL_FUNCE and stores it into struct net_device *dev->dev_addr which + * must be set up properly by the driver (see examples!). + */ +int pcmcia_get_mac_from_cis(struct pcmcia_device *p_dev, struct net_device *dev) +{ + return pcmcia_loop_tuple(p_dev, CISTPL_FUNCE, pcmcia_do_get_mac, dev); +}; +EXPORT_SYMBOL(pcmcia_get_mac_from_cis); - return ret; -} -EXPORT_SYMBOL(pcmcia_loop_config); diff --git a/drivers/pcmcia/rsrc_mgr.c b/drivers/pcmcia/rsrc_mgr.c index e592e0e..de0e770 100644 --- a/drivers/pcmcia/rsrc_mgr.c +++ b/drivers/pcmcia/rsrc_mgr.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "cs_internal.h" diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index a2be80b..2eb6e24 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -34,6 +34,7 @@ struct pcmcia_socket; struct pcmcia_device; struct config_t; +struct net_device; /* dynamic device IDs for PCMCIA device drivers. See * Documentation/pcmcia/driver.txt for details. @@ -176,26 +177,39 @@ const char *pcmcia_error_ret(int ret); pcmcia_error_ret(ret)); \ } -/* CIS access. - * Use the pcmcia_* versions in PCMCIA drivers + +/* + * CIS access. + * + * Please use the following functions to access CIS tuples: + * - pcmcia_get_tuple() + * - pcmcia_loop_tuple() + * - pcmcia_get_mac_from_cis() + * + * To parse a tuple_t, pcmcia_parse_tuple() exists. Its interface + * might change in future. */ -int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse); -int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_first_tuple(p_dev, tuple) \ - pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple) +/* get the very first CIS entry of type @code. Note that buf is pointer + * to u8 *buf; and that you need to kfree(buf) afterwards. */ +size_t pcmcia_get_tuple(struct pcmcia_device *p_dev, cisdata_t code, + u8 **buf); -int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_next_tuple(p_dev, tuple) \ - pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple) +/* loop over CIS entries */ +int pcmcia_loop_tuple(struct pcmcia_device *p_dev, cisdata_t code, + int (*loop_tuple) (struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data), + void *priv_data); -int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); -#define pcmcia_get_tuple_data(p_dev, tuple) \ - pccard_get_tuple_data(p_dev->socket, tuple) +/* get the MAC address from CISTPL_FUNCE */ +int pcmcia_get_mac_from_cis(struct pcmcia_device *p_dev, + struct net_device *dev); +/* parse a tuple_t */ +int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse); + /* loop CIS entries for valid configuration */ int pcmcia_loop_config(struct pcmcia_device *p_dev, int (*conf_check) (struct pcmcia_device *p_dev, @@ -215,6 +229,21 @@ int pcmcia_reset_card(struct pcmcia_socket *skt); int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, conf_reg_t *reg); +/* deprecated -- do not use in drivers. */ +int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); +#define pcmcia_get_first_tuple(p_dev, tuple) \ + pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple) + +int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); +#define pcmcia_get_next_tuple(p_dev, tuple) \ + pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple) + +int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); +#define pcmcia_get_tuple_data(p_dev, tuple) \ + pccard_get_tuple_data(p_dev->socket, tuple) + /* device configuration */ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req); int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req); -- cgit v0.10.2 From dddfbd824b96a25da0b2f1cf35c0be33ef2422fe Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Oct 2009 23:54:24 +0200 Subject: pcmcia: convert net pcmcia drivers to use new CIS helpers Use the new CIS helpers in net pcmcia drivers, which allows for a few code cleanups. This revision does not remove the phys_addr assignment in 3c589_cs.c -- a bug noted by Komuro CC: David S. Miller CC: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index b58965a..6449290 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -344,13 +344,13 @@ static int tc574_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; struct el3_private *lp = netdev_priv(dev); - tuple_t tuple; - __le16 buf[32]; int last_fn, last_ret, i, j; unsigned int ioaddr; __be16 *phys_addr; char *cardname; __u32 config; + u8 *buf; + size_t len; phys_addr = (__be16 *)dev->dev_addr; @@ -378,16 +378,14 @@ static int tc574_config(struct pcmcia_device *link) /* The 3c574 normally uses an EEPROM for configuration info, including the hardware address. The future products may include a modem chip and put the address in the CIS. */ - tuple.Attributes = 0; - tuple.TupleData = (cisdata_t *)buf; - tuple.TupleDataMax = 64; - tuple.TupleOffset = 0; - tuple.DesiredTuple = 0x88; - if (pcmcia_get_first_tuple(link, &tuple) == 0) { - pcmcia_get_tuple_data(link, &tuple); + + len = pcmcia_get_tuple(link, 0x88, &buf); + if (buf && len >= 6) { for (i = 0; i < 3; i++) - phys_addr[i] = htons(le16_to_cpu(buf[i])); + phys_addr[i] = htons(le16_to_cpu(buf[i * 2])); + kfree(buf); } else { + kfree(buf); /* 0 < len < 6 */ EL3WINDOW(0); for (i = 0; i < 3; i++) phys_addr[i] = htons(read_eeprom(ioaddr, i + 10)); diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c index 569fb06..fc6123e 100644 --- a/drivers/net/pcmcia/3c589_cs.c +++ b/drivers/net/pcmcia/3c589_cs.c @@ -256,22 +256,16 @@ static int tc589_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; struct el3_private *lp = netdev_priv(dev); - tuple_t tuple; - __le16 buf[32]; __be16 *phys_addr; int last_fn, last_ret, i, j, multi = 0, fifo; unsigned int ioaddr; char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; + u8 *buf; + size_t len; DEBUG(0, "3c589_config(0x%p)\n", link); phys_addr = (__be16 *)dev->dev_addr; - tuple.Attributes = 0; - tuple.TupleData = (cisdata_t *)buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - tuple.Attributes = TUPLE_RETURN_COMMON; - /* Is this a 3c562? */ if (link->manf_id != MANFID_3COM) printk(KERN_INFO "3c589_cs: hmmm, is this really a " @@ -301,12 +295,13 @@ static int tc589_config(struct pcmcia_device *link) /* The 3c589 has an extra EEPROM for configuration info, including the hardware address. The 3c562 puts the address in the CIS. */ - tuple.DesiredTuple = 0x88; - if (pcmcia_get_first_tuple(link, &tuple) == 0) { - pcmcia_get_tuple_data(link, &tuple); - for (i = 0; i < 3; i++) - phys_addr[i] = htons(le16_to_cpu(buf[i])); + len = pcmcia_get_tuple(link, 0x88, &buf); + if (buf && len >= 6) { + for (i = 0; i < 3; i++) + phys_addr[i] = htons(le16_to_cpu(buf[i*2])); + kfree(buf); } else { + kfree(buf); /* 0 < len < 6 */ for (i = 0; i < 3; i++) phys_addr[i] = htons(read_eeprom(ioaddr, i)); if (phys_addr[0] == htons(0x6060)) { diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index c7a2bbf..5b73b11 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -350,32 +350,29 @@ static int fmvj18x_ioprobe(struct pcmcia_device *p_dev, return 0; /* strange, but that's what the code did already before... */ } - static int fmvj18x_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; local_info_t *lp = netdev_priv(dev); - tuple_t tuple; - u_short buf[32]; int i, last_fn = RequestIO, last_ret = 0, ret; unsigned int ioaddr; cardtype_t cardtype; char *card_name = "unknown"; - u_char *node_id; + u8 *buf; + size_t len; + u_char buggybuf[32]; DEBUG(0, "fmvj18x_config(0x%p)\n", link); - tuple.TupleData = (u_char *)buf; - tuple.TupleDataMax = 64; - tuple.TupleOffset = 0; - tuple.DesiredTuple = CISTPL_FUNCE; - tuple.TupleOffset = 0; - if (pcmcia_get_first_tuple(link, &tuple) == 0) { + len = pcmcia_get_tuple(link, CISTPL_FUNCE, &buf); + kfree(buf); + + if (len) { + /* Yes, I have CISTPL_FUNCE. Let's check CISTPL_MANFID */ last_ret = pcmcia_loop_config(link, fmvj18x_ioprobe, NULL); if (last_ret != 0) goto cs_failed; - /* Yes, I have CISTPL_FUNCE. Let's check CISTPL_MANFID */ switch (link->manf_id) { case MANFID_TDK: cardtype = TDK; @@ -482,21 +479,21 @@ static int fmvj18x_config(struct pcmcia_device *link) case CONTEC: case NEC: case KME: - tuple.DesiredTuple = CISTPL_FUNCE; - tuple.TupleOffset = 0; - CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple)); - tuple.TupleOffset = 0; - CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple)); if (cardtype == MBH10304) { - /* MBH10304's CIS_FUNCE is corrupted */ - node_id = &(tuple.TupleData[5]); card_name = "FMV-J182"; - } else { - while (tuple.TupleData[0] != CISTPL_FUNCE_LAN_NODE_ID ) { - CS_CHECK(GetNextTuple, pcmcia_get_next_tuple(link, &tuple)); - CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple)); + + len = pcmcia_get_tuple(link, CISTPL_FUNCE, &buf); + if (len < 11) { + kfree(buf); + goto failed; } - node_id = &(tuple.TupleData[2]); + /* Read MACID from CIS */ + for (i = 5; i < 11; i++) + dev->dev_addr[i] = buf[i]; + kfree(buf); + } else { + if (pcmcia_get_mac_from_cis(link, dev)) + goto failed; if( cardtype == TDK ) { card_name = "TDK LAK-CD021"; } else if( cardtype == LA501 ) { @@ -509,9 +506,6 @@ static int fmvj18x_config(struct pcmcia_device *link) card_name = "C-NET(PC)C"; } } - /* Read MACID from CIS */ - for (i = 0; i < 6; i++) - dev->dev_addr[i] = node_id[i]; break; case UNGERMANN: /* Read MACID from register */ @@ -521,12 +515,12 @@ static int fmvj18x_config(struct pcmcia_device *link) break; case XXX10304: /* Read MACID from Buggy CIS */ - if (fmvj18x_get_hwinfo(link, tuple.TupleData) == -1) { + if (fmvj18x_get_hwinfo(link, buggybuf) == -1) { printk(KERN_NOTICE "fmvj18x_cs: unable to read hardware net address.\n"); goto failed; } for (i = 0 ; i < 6; i++) { - dev->dev_addr[i] = tuple.TupleData[i]; + dev->dev_addr[i] = buggybuf[i]; } card_name = "FMV-J182"; break; diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c index 5ed6339..4b96b35 100644 --- a/drivers/net/pcmcia/nmclan_cs.c +++ b/drivers/net/pcmcia/nmclan_cs.c @@ -661,8 +661,8 @@ static int nmclan_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; mace_private *lp = netdev_priv(dev); - tuple_t tuple; - u_char buf[64]; + u8 *buf; + size_t len; int i, last_ret, last_fn; unsigned int ioaddr; @@ -677,14 +677,13 @@ static int nmclan_config(struct pcmcia_device *link) ioaddr = dev->base_addr; /* Read the ethernet address from the CIS. */ - tuple.DesiredTuple = 0x80 /* CISTPL_CFTABLE_ENTRY_MISC */; - tuple.TupleData = buf; - tuple.TupleDataMax = 64; - tuple.TupleOffset = 0; - tuple.Attributes = 0; - CS_CHECK(GetFirstTuple, pcmcia_get_first_tuple(link, &tuple)); - CS_CHECK(GetTupleData, pcmcia_get_tuple_data(link, &tuple)); - memcpy(dev->dev_addr, tuple.TupleData, ETHER_ADDR_LEN); + len = pcmcia_get_tuple(link, 0x80, &buf); + if (!buf || len < ETHER_ADDR_LEN) { + kfree(buf); + goto failed; + } + memcpy(dev->dev_addr, buf, ETHER_ADDR_LEN); + kfree(buf); /* Verify configuration by reading the MACE ID. */ { diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index af03759..df92bcd 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -126,12 +126,6 @@ struct smc_private { int rx_ovrn; }; -struct smc_cfg_mem { - tuple_t tuple; - cisparse_t parse; - u_char buf[255]; -}; - /* Special definitions for Megahertz multifunction cards */ #define MEGAHERTZ_ISR 0x0380 @@ -408,34 +402,7 @@ static int cvt_ascii_address(struct net_device *dev, char *s) return 0; } -/*====================================================================*/ - -static int first_tuple(struct pcmcia_device *handle, tuple_t *tuple, - cisparse_t *parse) -{ - int i; - - i = pcmcia_get_first_tuple(handle, tuple); - if (i != 0) - return i; - i = pcmcia_get_tuple_data(handle, tuple); - if (i != 0) - return i; - return pcmcia_parse_tuple(tuple, parse); -} - -static int next_tuple(struct pcmcia_device *handle, tuple_t *tuple, - cisparse_t *parse) -{ - int i; - - if ((i = pcmcia_get_next_tuple(handle, tuple)) != 0 || - (i = pcmcia_get_tuple_data(handle, tuple)) != 0) - return i; - return pcmcia_parse_tuple(tuple, parse); -} - -/*====================================================================== +/*==================================================================== Configuration stuff for Megahertz cards @@ -490,15 +457,10 @@ static int mhz_mfc_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; struct smc_private *smc = netdev_priv(dev); - struct smc_cfg_mem *cfg_mem; win_req_t req; memreq_t mem; int i; - cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL); - if (!cfg_mem) - return -ENOMEM; - link->conf.Attributes |= CONF_ENABLE_SPKR; link->conf.Status = CCSR_AUDIO_ENA; link->irq.Attributes = @@ -510,7 +472,8 @@ static int mhz_mfc_config(struct pcmcia_device *link) /* The Megahertz combo cards have modem-like CIS entries, so we have to explicitly try a bunch of port combinations. */ if (pcmcia_loop_config(link, mhz_mfc_config_check, NULL)) - goto free_cfg_mem; + return -ENODEV; + dev->base_addr = link->io.BasePort1; /* Allocate a memory window, for accessing the ISR */ @@ -519,7 +482,8 @@ static int mhz_mfc_config(struct pcmcia_device *link) req.AccessSpeed = 0; i = pcmcia_request_window(&link, &req, &link->win); if (i != 0) - goto free_cfg_mem; + return -ENODEV; + smc->base = ioremap(req.Base, req.Size); mem.CardOffset = mem.Page = 0; if (smc->manfid == MANFID_MOTOROLA) @@ -531,18 +495,32 @@ static int mhz_mfc_config(struct pcmcia_device *link) && (smc->cardid == PRODID_MEGAHERTZ_EM3288)) mhz_3288_power(link); -free_cfg_mem: - kfree(cfg_mem); - return -ENODEV; + return 0; } +static int pcmcia_get_versmac(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv) +{ + struct net_device *dev = priv; + cisparse_t parse; + + if (pcmcia_parse_tuple(tuple, &parse)) + return -EINVAL; + + if ((parse.version_1.ns > 3) && + (cvt_ascii_address(dev, + (parse.version_1.str + parse.version_1.ofs[3])))) + return 0; + + return -EINVAL; +}; + static int mhz_setup(struct pcmcia_device *link) { struct net_device *dev = link->priv; - struct smc_cfg_mem *cfg_mem; - tuple_t *tuple; - cisparse_t *parse; - u_char *buf, *station_addr; + size_t len; + u8 *buf; int rc; /* Read the station address from the CIS. It is stored as the last @@ -552,56 +530,22 @@ static int mhz_setup(struct pcmcia_device *link) return 0; /* Workarounds for broken cards start here. */ - - cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL); - if (!cfg_mem) - return -1; - - tuple = &cfg_mem->tuple; - parse = &cfg_mem->parse; - buf = cfg_mem->buf; - - tuple->Attributes = tuple->TupleOffset = 0; - tuple->TupleData = (cisdata_t *)buf; - tuple->TupleDataMax = 255; - /* Ugh -- the EM1144 card has two VERS_1 tuples!?! */ - tuple->DesiredTuple = CISTPL_VERS_1; - if (first_tuple(link, tuple, parse) != 0) { - rc = -1; - goto free_cfg_mem; - } - /* Ugh -- the EM1144 card has two VERS_1 tuples!?! */ - if (next_tuple(link, tuple, parse) != 0) - first_tuple(link, tuple, parse); - if (parse->version_1.ns > 3) { - station_addr = parse->version_1.str + parse->version_1.ofs[3]; - if (cvt_ascii_address(dev, station_addr) == 0) { - rc = 0; - goto free_cfg_mem; - } - } + if (!pcmcia_loop_tuple(link, CISTPL_VERS_1, pcmcia_get_versmac, dev)) + return 0; /* Another possibility: for the EM3288, in a special tuple */ - tuple->DesiredTuple = 0x81; - if (pcmcia_get_first_tuple(link, tuple) != 0) { - rc = -1; - goto free_cfg_mem; - } - if (pcmcia_get_tuple_data(link, tuple) != 0) { - rc = -1; - goto free_cfg_mem; - } - buf[12] = '\0'; - if (cvt_ascii_address(dev, buf) == 0) { - rc = 0; - goto free_cfg_mem; - } rc = -1; -free_cfg_mem: - kfree(cfg_mem); - return rc; -} + len = pcmcia_get_tuple(link, 0x81, &buf); + if (buf && len >= 13) { + buf[12] = '\0'; + if (cvt_ascii_address(dev, buf)) + rc = 0; + } + kfree(buf); + + return rc; +}; /*====================================================================== @@ -691,58 +635,21 @@ static int smc_config(struct pcmcia_device *link) return i; } + static int smc_setup(struct pcmcia_device *link) { struct net_device *dev = link->priv; - struct smc_cfg_mem *cfg_mem; - tuple_t *tuple; - cisparse_t *parse; - cistpl_lan_node_id_t *node_id; - u_char *buf, *station_addr; - int i, rc; - - cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL); - if (!cfg_mem) - return -ENOMEM; - - tuple = &cfg_mem->tuple; - parse = &cfg_mem->parse; - buf = cfg_mem->buf; - - tuple->Attributes = tuple->TupleOffset = 0; - tuple->TupleData = (cisdata_t *)buf; - tuple->TupleDataMax = 255; /* Check for a LAN function extension tuple */ - tuple->DesiredTuple = CISTPL_FUNCE; - i = first_tuple(link, tuple, parse); - while (i == 0) { - if (parse->funce.type == CISTPL_FUNCE_LAN_NODE_ID) - break; - i = next_tuple(link, tuple, parse); - } - if (i == 0) { - node_id = (cistpl_lan_node_id_t *)parse->funce.data; - if (node_id->nb == 6) { - for (i = 0; i < 6; i++) - dev->dev_addr[i] = node_id->id[i]; - rc = 0; - goto free_cfg_mem; - } - } + if (!pcmcia_get_mac_from_cis(link, dev)) + return 0; + /* Try the third string in the Version 1 Version/ID tuple. */ if (link->prod_id[2]) { - station_addr = link->prod_id[2]; - if (cvt_ascii_address(dev, station_addr) == 0) { - rc = 0; - goto free_cfg_mem; - } + if (cvt_ascii_address(dev, link->prod_id[2]) == 0) + return 0; } - - rc = -1; -free_cfg_mem: - kfree(cfg_mem); - return rc; + return -1; } /*====================================================================*/ @@ -801,41 +708,31 @@ static int osi_load_firmware(struct pcmcia_device *link) return err; } -static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid) +static int pcmcia_osi_mac(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv) { - struct net_device *dev = link->priv; - struct smc_cfg_mem *cfg_mem; - tuple_t *tuple; - u_char *buf; - int i, rc; + struct net_device *dev = priv; + int i; - cfg_mem = kmalloc(sizeof(struct smc_cfg_mem), GFP_KERNEL); - if (!cfg_mem) - return -1; + if (tuple->TupleDataLen < 8) + return -EINVAL; + if (tuple->TupleData[0] != 0x04) + return -EINVAL; + for (i = 0; i < 6; i++) + dev->dev_addr[i] = tuple->TupleData[i+2]; + return 0; +}; - tuple = &cfg_mem->tuple; - buf = cfg_mem->buf; - tuple->Attributes = TUPLE_RETURN_COMMON; - tuple->TupleData = (cisdata_t *)buf; - tuple->TupleDataMax = 255; - tuple->TupleOffset = 0; +static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid) +{ + struct net_device *dev = link->priv; + int rc; /* Read the station address from tuple 0x90, subtuple 0x04 */ - tuple->DesiredTuple = 0x90; - i = pcmcia_get_first_tuple(link, tuple); - while (i == 0) { - i = pcmcia_get_tuple_data(link, tuple); - if ((i != 0) || (buf[0] == 0x04)) - break; - i = pcmcia_get_next_tuple(link, tuple); - } - if (i != 0) { - rc = -1; - goto free_cfg_mem; - } - for (i = 0; i < 6; i++) - dev->dev_addr[i] = buf[i+2]; + if (pcmcia_loop_tuple(link, 0x90, pcmcia_osi_mac, dev)) + return -1; if (((manfid == MANFID_OSITECH) && (cardid == PRODID_OSITECH_SEVEN)) || @@ -843,7 +740,7 @@ static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid) (cardid == PRODID_PSION_NET100))) { rc = osi_load_firmware(link); if (rc) - goto free_cfg_mem; + return rc; } else if (manfid == MANFID_OSITECH) { /* Make sure both functions are powered up */ set_bits(0x300, link->io.BasePort1 + OSITECH_AUI_PWR); @@ -853,10 +750,7 @@ static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid) inw(link->io.BasePort1 + OSITECH_AUI_PWR), inw(link->io.BasePort1 + OSITECH_RESET_ISR)); } - rc = 0; -free_cfg_mem: - kfree(cfg_mem); - return rc; + return 0; } static int smc91c92_suspend(struct pcmcia_device *link) diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index 5e203230..925e850 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -359,7 +359,7 @@ static void xirc_tx_timeout(struct net_device *dev); static void xirc2ps_tx_timeout_task(struct work_struct *work); static void set_addresses(struct net_device *dev); static void set_multicast_list(struct net_device *dev); -static int set_card_type(struct pcmcia_device *link, const void *s); +static int set_card_type(struct pcmcia_device *link); static int do_config(struct net_device *dev, struct ifmap *map); static int do_open(struct net_device *dev); static int do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); @@ -371,28 +371,6 @@ static void do_powerdown(struct net_device *dev); static int do_stop(struct net_device *dev); /*=============== Helper functions =========================*/ -static int -first_tuple(struct pcmcia_device *handle, tuple_t *tuple, cisparse_t *parse) -{ - int err; - - if ((err = pcmcia_get_first_tuple(handle, tuple)) == 0 && - (err = pcmcia_get_tuple_data(handle, tuple)) == 0) - err = pcmcia_parse_tuple(tuple, parse); - return err; -} - -static int -next_tuple(struct pcmcia_device *handle, tuple_t *tuple, cisparse_t *parse) -{ - int err; - - if ((err = pcmcia_get_next_tuple(handle, tuple)) == 0 && - (err = pcmcia_get_tuple_data(handle, tuple)) == 0) - err = pcmcia_parse_tuple(tuple, parse); - return err; -} - #define SelectPage(pgnr) outb((pgnr), ioaddr + XIRCREG_PR) #define GetByte(reg) ((unsigned)inb(ioaddr + (reg))) #define GetWord(reg) ((unsigned)inw(ioaddr + (reg))) @@ -644,15 +622,23 @@ xirc2ps_detach(struct pcmcia_device *link) * */ static int -set_card_type(struct pcmcia_device *link, const void *s) +set_card_type(struct pcmcia_device *link) { struct net_device *dev = link->priv; local_info_t *local = netdev_priv(dev); - #ifdef PCMCIA_DEBUG - unsigned cisrev = ((const unsigned char *)s)[2]; - #endif - unsigned mediaid= ((const unsigned char *)s)[3]; - unsigned prodid = ((const unsigned char *)s)[4]; + u8 *buf; + unsigned int cisrev, mediaid, prodid; + size_t len; + + len = pcmcia_get_tuple(link, CISTPL_MANFID, &buf); + if (len < 5) { + dev_err(&link->dev, "invalid CIS -- sorry\n"); + return 0; + } + + cisrev = buf[2]; + mediaid = buf[3]; + prodid = buf[4]; DEBUG(0, "cisrev=%02x mediaid=%02x prodid=%02x\n", cisrev, mediaid, prodid); @@ -761,6 +747,26 @@ xirc2ps_config_check(struct pcmcia_device *p_dev, } + +static int pcmcia_get_mac_ce(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv) +{ + struct net_device *dev = priv; + int i; + + if (tuple->TupleDataLen != 13) + return -EINVAL; + if ((tuple->TupleData[0] != 2) || (tuple->TupleData[1] != 1) || + (tuple->TupleData[2] != 6)) + return -EINVAL; + /* another try (James Lehmer's CE2 version 4.1)*/ + for (i = 2; i < 6; i++) + dev->dev_addr[i] = tuple->TupleData[i+2]; + return 0; +}; + + /**************** * xirc2ps_config() is scheduled to run after a CARD_INSERTION event * is received, to configure the PCMCIA socket, and to make the @@ -772,25 +778,14 @@ xirc2ps_config(struct pcmcia_device * link) struct net_device *dev = link->priv; local_info_t *local = netdev_priv(dev); unsigned int ioaddr; - tuple_t tuple; - cisparse_t parse; - int err, i; - u_char buf[64]; - cistpl_lan_node_id_t *node_id = (cistpl_lan_node_id_t*)parse.funce.data; + int err; + u8 *buf; + size_t len; local->dingo_ccr = NULL; DEBUG(0, "config(0x%p)\n", link); - /* - * This reads the card's CONFIG tuple to find its configuration - * registers. - */ - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = 64; - tuple.TupleOffset = 0; - /* Is this a valid card */ if (link->has_manf_id == 0) { printk(KNOT_XIRC "manfid not found in CIS\n"); @@ -816,67 +811,41 @@ xirc2ps_config(struct pcmcia_device * link) break; default: printk(KNOT_XIRC "Unknown Card Manufacturer ID: 0x%04x\n", - (unsigned)parse.manfid.manf); + (unsigned)link->manf_id); goto failure; } DEBUG(0, "found %s card\n", local->manf_str); - /* needed for the additional fields to be parsed by set_card_type() */ - tuple.DesiredTuple = CISTPL_MANFID; - err = first_tuple(link, &tuple, &parse) - if (err) { - printk(KNOT_XIRC "manfid not found in CIS\n"); - goto failure; - } - if (!set_card_type(link, buf)) { + if (!set_card_type(link)) { printk(KNOT_XIRC "this card is not supported\n"); goto failure; } /* get the ethernet address from the CIS */ - tuple.DesiredTuple = CISTPL_FUNCE; - for (err = first_tuple(link, &tuple, &parse); !err; - err = next_tuple(link, &tuple, &parse)) { - /* Once I saw two CISTPL_FUNCE_LAN_NODE_ID entries: - * the first one with a length of zero the second correct - - * so I skip all entries with length 0 */ - if (parse.funce.type == CISTPL_FUNCE_LAN_NODE_ID - && ((cistpl_lan_node_id_t *)parse.funce.data)->nb) - break; - } - if (err) { /* not found: try to get the node-id from tuple 0x89 */ - tuple.DesiredTuple = 0x89; /* data layout looks like tuple 0x22 */ - if ((err = pcmcia_get_first_tuple(link, &tuple)) == 0 && - (err = pcmcia_get_tuple_data(link, &tuple)) == 0) { - if (tuple.TupleDataLen == 8 && *buf == CISTPL_FUNCE_LAN_NODE_ID) - memcpy(&parse, buf, 8); - else - err = -1; - } - } - if (err) { /* another try (James Lehmer's CE2 version 4.1)*/ - tuple.DesiredTuple = CISTPL_FUNCE; - for (err = first_tuple(link, &tuple, &parse); !err; - err = next_tuple(link, &tuple, &parse)) { - if (parse.funce.type == 0x02 && parse.funce.data[0] == 1 - && parse.funce.data[1] == 6 && tuple.TupleDataLen == 13) { - buf[1] = 4; - memcpy(&parse, buf+1, 8); - break; + err = pcmcia_get_mac_from_cis(link, dev); + + /* not found: try to get the node-id from tuple 0x89 */ + if (err) { + len = pcmcia_get_tuple(link, 0x89, &buf); + /* data layout looks like tuple 0x22 */ + if (buf && len == 8) { + if (*buf == CISTPL_FUNCE_LAN_NODE_ID) { + int i; + for (i = 2; i < 6; i++) + dev->dev_addr[i] = buf[i+2]; + } else + err = -1; } - } + kfree(buf); } + + if (err) + err = pcmcia_loop_tuple(link, CISTPL_FUNCE, pcmcia_get_mac_ce, dev); + if (err) { printk(KNOT_XIRC "node-id not found in CIS\n"); goto failure; } - node_id = (cistpl_lan_node_id_t *)parse.funce.data; - if (node_id->nb != 6) { - printk(KNOT_XIRC "malformed node-id in CIS\n"); - goto failure; - } - for (i=0; i < 6; i++) - dev->dev_addr[i] = node_id->id[i]; link->io.IOAddrLines =10; link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; -- cgit v0.10.2 From 37ace3d4131ae80f370eb1230fa7db2b3eedf17c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Oct 2009 23:56:41 +0200 Subject: pcmcia: convert ssb pcmcia driver to use new CIS helpers SSB is a prime example of how to make use of the new CIS helpers. CC: Michael Buesch Acked-by: John W. Linville Signed-off-by: Dominik Brodowski diff --git a/drivers/ssb/pcmcia.c b/drivers/ssb/pcmcia.c index 100e7a5..e72f404 100644 --- a/drivers/ssb/pcmcia.c +++ b/drivers/ssb/pcmcia.c @@ -617,136 +617,140 @@ static int ssb_pcmcia_sprom_check_crc(const u16 *sprom, size_t size) } \ } while (0) -int ssb_pcmcia_get_invariants(struct ssb_bus *bus, - struct ssb_init_invariants *iv) +static int ssb_pcmcia_get_mac(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv) { - tuple_t tuple; - int res; - unsigned char buf[32]; + struct ssb_sprom *sprom = priv; + + if (tuple->TupleData[0] != CISTPL_FUNCE_LAN_NODE_ID) + return -EINVAL; + if (tuple->TupleDataLen != ETH_ALEN + 2) + return -EINVAL; + if (tuple->TupleData[1] != ETH_ALEN) + return -EINVAL; + memcpy(sprom->il0mac, &tuple->TupleData[2], ETH_ALEN); + return 0; +}; + +static int ssb_pcmcia_do_get_invariants(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv) +{ + struct ssb_init_invariants *iv = priv; struct ssb_sprom *sprom = &iv->sprom; struct ssb_boardinfo *bi = &iv->boardinfo; const char *error_description; + GOTO_ERROR_ON(tuple->TupleDataLen < 1, "VEN tpl < 1"); + switch (tuple->TupleData[0]) { + case SSB_PCMCIA_CIS_ID: + GOTO_ERROR_ON((tuple->TupleDataLen != 5) && + (tuple->TupleDataLen != 7), + "id tpl size"); + bi->vendor = tuple->TupleData[1] | + ((u16)tuple->TupleData[2] << 8); + break; + case SSB_PCMCIA_CIS_BOARDREV: + GOTO_ERROR_ON(tuple->TupleDataLen != 2, + "boardrev tpl size"); + sprom->board_rev = tuple->TupleData[1]; + break; + case SSB_PCMCIA_CIS_PA: + GOTO_ERROR_ON((tuple->TupleDataLen != 9) && + (tuple->TupleDataLen != 10), + "pa tpl size"); + sprom->pa0b0 = tuple->TupleData[1] | + ((u16)tuple->TupleData[2] << 8); + sprom->pa0b1 = tuple->TupleData[3] | + ((u16)tuple->TupleData[4] << 8); + sprom->pa0b2 = tuple->TupleData[5] | + ((u16)tuple->TupleData[6] << 8); + sprom->itssi_a = tuple->TupleData[7]; + sprom->itssi_bg = tuple->TupleData[7]; + sprom->maxpwr_a = tuple->TupleData[8]; + sprom->maxpwr_bg = tuple->TupleData[8]; + break; + case SSB_PCMCIA_CIS_OEMNAME: + /* We ignore this. */ + break; + case SSB_PCMCIA_CIS_CCODE: + GOTO_ERROR_ON(tuple->TupleDataLen != 2, + "ccode tpl size"); + sprom->country_code = tuple->TupleData[1]; + break; + case SSB_PCMCIA_CIS_ANTENNA: + GOTO_ERROR_ON(tuple->TupleDataLen != 2, + "ant tpl size"); + sprom->ant_available_a = tuple->TupleData[1]; + sprom->ant_available_bg = tuple->TupleData[1]; + break; + case SSB_PCMCIA_CIS_ANTGAIN: + GOTO_ERROR_ON(tuple->TupleDataLen != 2, + "antg tpl size"); + sprom->antenna_gain.ghz24.a0 = tuple->TupleData[1]; + sprom->antenna_gain.ghz24.a1 = tuple->TupleData[1]; + sprom->antenna_gain.ghz24.a2 = tuple->TupleData[1]; + sprom->antenna_gain.ghz24.a3 = tuple->TupleData[1]; + sprom->antenna_gain.ghz5.a0 = tuple->TupleData[1]; + sprom->antenna_gain.ghz5.a1 = tuple->TupleData[1]; + sprom->antenna_gain.ghz5.a2 = tuple->TupleData[1]; + sprom->antenna_gain.ghz5.a3 = tuple->TupleData[1]; + break; + case SSB_PCMCIA_CIS_BFLAGS: + GOTO_ERROR_ON((tuple->TupleDataLen != 3) && + (tuple->TupleDataLen != 5), + "bfl tpl size"); + sprom->boardflags_lo = tuple->TupleData[1] | + ((u16)tuple->TupleData[2] << 8); + break; + case SSB_PCMCIA_CIS_LEDS: + GOTO_ERROR_ON(tuple->TupleDataLen != 5, + "leds tpl size"); + sprom->gpio0 = tuple->TupleData[1]; + sprom->gpio1 = tuple->TupleData[2]; + sprom->gpio2 = tuple->TupleData[3]; + sprom->gpio3 = tuple->TupleData[4]; + break; + } + return -ENOSPC; /* continue with next entry */ + +error: + ssb_printk(KERN_ERR PFX + "PCMCIA: Failed to fetch device invariants: %s\n", + error_description); + return -ENODEV; +} + + +int ssb_pcmcia_get_invariants(struct ssb_bus *bus, + struct ssb_init_invariants *iv) +{ + struct ssb_sprom *sprom = &iv->sprom; + int res; + memset(sprom, 0xFF, sizeof(*sprom)); sprom->revision = 1; sprom->boardflags_lo = 0; sprom->boardflags_hi = 0; /* First fetch the MAC address. */ - memset(&tuple, 0, sizeof(tuple)); - tuple.DesiredTuple = CISTPL_FUNCE; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - res = pcmcia_get_first_tuple(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "MAC first tpl"); - res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "MAC first tpl data"); - while (1) { - GOTO_ERROR_ON(tuple.TupleDataLen < 1, "MAC tpl < 1"); - if (tuple.TupleData[0] == CISTPL_FUNCE_LAN_NODE_ID) - break; - res = pcmcia_get_next_tuple(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "MAC next tpl"); - res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "MAC next tpl data"); + res = pcmcia_loop_tuple(bus->host_pcmcia, CISTPL_FUNCE, + ssb_pcmcia_get_mac, sprom); + if (res != 0) { + ssb_printk(KERN_ERR PFX + "PCMCIA: Failed to fetch MAC address\n"); + return -ENODEV; } - GOTO_ERROR_ON(tuple.TupleDataLen != ETH_ALEN + 2, "MAC tpl size"); - memcpy(sprom->il0mac, &tuple.TupleData[2], ETH_ALEN); /* Fetch the vendor specific tuples. */ - memset(&tuple, 0, sizeof(tuple)); - tuple.DesiredTuple = SSB_PCMCIA_CIS; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - res = pcmcia_get_first_tuple(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "VEN first tpl"); - res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "VEN first tpl data"); - while (1) { - GOTO_ERROR_ON(tuple.TupleDataLen < 1, "VEN tpl < 1"); - switch (tuple.TupleData[0]) { - case SSB_PCMCIA_CIS_ID: - GOTO_ERROR_ON((tuple.TupleDataLen != 5) && - (tuple.TupleDataLen != 7), - "id tpl size"); - bi->vendor = tuple.TupleData[1] | - ((u16)tuple.TupleData[2] << 8); - break; - case SSB_PCMCIA_CIS_BOARDREV: - GOTO_ERROR_ON(tuple.TupleDataLen != 2, - "boardrev tpl size"); - sprom->board_rev = tuple.TupleData[1]; - break; - case SSB_PCMCIA_CIS_PA: - GOTO_ERROR_ON((tuple.TupleDataLen != 9) && - (tuple.TupleDataLen != 10), - "pa tpl size"); - sprom->pa0b0 = tuple.TupleData[1] | - ((u16)tuple.TupleData[2] << 8); - sprom->pa0b1 = tuple.TupleData[3] | - ((u16)tuple.TupleData[4] << 8); - sprom->pa0b2 = tuple.TupleData[5] | - ((u16)tuple.TupleData[6] << 8); - sprom->itssi_a = tuple.TupleData[7]; - sprom->itssi_bg = tuple.TupleData[7]; - sprom->maxpwr_a = tuple.TupleData[8]; - sprom->maxpwr_bg = tuple.TupleData[8]; - break; - case SSB_PCMCIA_CIS_OEMNAME: - /* We ignore this. */ - break; - case SSB_PCMCIA_CIS_CCODE: - GOTO_ERROR_ON(tuple.TupleDataLen != 2, - "ccode tpl size"); - sprom->country_code = tuple.TupleData[1]; - break; - case SSB_PCMCIA_CIS_ANTENNA: - GOTO_ERROR_ON(tuple.TupleDataLen != 2, - "ant tpl size"); - sprom->ant_available_a = tuple.TupleData[1]; - sprom->ant_available_bg = tuple.TupleData[1]; - break; - case SSB_PCMCIA_CIS_ANTGAIN: - GOTO_ERROR_ON(tuple.TupleDataLen != 2, - "antg tpl size"); - sprom->antenna_gain.ghz24.a0 = tuple.TupleData[1]; - sprom->antenna_gain.ghz24.a1 = tuple.TupleData[1]; - sprom->antenna_gain.ghz24.a2 = tuple.TupleData[1]; - sprom->antenna_gain.ghz24.a3 = tuple.TupleData[1]; - sprom->antenna_gain.ghz5.a0 = tuple.TupleData[1]; - sprom->antenna_gain.ghz5.a1 = tuple.TupleData[1]; - sprom->antenna_gain.ghz5.a2 = tuple.TupleData[1]; - sprom->antenna_gain.ghz5.a3 = tuple.TupleData[1]; - break; - case SSB_PCMCIA_CIS_BFLAGS: - GOTO_ERROR_ON((tuple.TupleDataLen != 3) && - (tuple.TupleDataLen != 5), - "bfl tpl size"); - sprom->boardflags_lo = tuple.TupleData[1] | - ((u16)tuple.TupleData[2] << 8); - break; - case SSB_PCMCIA_CIS_LEDS: - GOTO_ERROR_ON(tuple.TupleDataLen != 5, - "leds tpl size"); - sprom->gpio0 = tuple.TupleData[1]; - sprom->gpio1 = tuple.TupleData[2]; - sprom->gpio2 = tuple.TupleData[3]; - sprom->gpio3 = tuple.TupleData[4]; - break; - } - res = pcmcia_get_next_tuple(bus->host_pcmcia, &tuple); - if (res == -ENOSPC) - break; - GOTO_ERROR_ON(res != 0, "VEN next tpl"); - res = pcmcia_get_tuple_data(bus->host_pcmcia, &tuple); - GOTO_ERROR_ON(res != 0, "VEN next tpl data"); - } + res = pcmcia_loop_tuple(bus->host_pcmcia, SSB_PCMCIA_CIS, + ssb_pcmcia_do_get_invariants, sprom); + if ((res == 0) || (res == -ENOSPC)) + return 0; - return 0; -error: ssb_printk(KERN_ERR PFX - "PCMCIA: Failed to fetch device invariants: %s\n", - error_description); + "PCMCIA: Failed to fetch device invariants\n"); return -ENODEV; } -- cgit v0.10.2 From 18b61b97294dad74dd00a1aa8efed0cfacb95aff Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Oct 2009 23:57:58 +0200 Subject: pcmcia: convert pcmciamtd driver to use new CIS helpers Convert the (broken) pcmciamtd driver to use the new CIS helpers. CC: David.Woodhouse@intel.com CC: linux-mtd@lists.infradead.org Signed-off-by: Dominik Brodowski diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index d600c2d..5c3546e 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -354,101 +354,110 @@ static void pcmciamtd_release(struct pcmcia_device *link) } -static void card_settings(struct pcmciamtd_dev *dev, struct pcmcia_device *link, int *new_name) +#ifdef CONFIG_MTD_DEBUG +static int pcmciamtd_cistpl_format(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data) { - int rc; - tuple_t tuple; cisparse_t parse; - u_char buf[64]; - - tuple.Attributes = 0; - tuple.TupleData = (cisdata_t *)buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - tuple.DesiredTuple = RETURN_FIRST_TUPLE; - - rc = pcmcia_get_first_tuple(link, &tuple); - while (rc == 0) { - rc = pcmcia_get_tuple_data(link, &tuple); - if (rc != 0) { - cs_error(link, GetTupleData, rc); - break; - } - rc = pcmcia_parse_tuple(&tuple, &parse); - if (rc != 0) { - cs_error(link, ParseTuple, rc); - break; - } - switch(tuple.TupleCode) { - case CISTPL_FORMAT: { - cistpl_format_t *t = &parse.format; - (void)t; /* Shut up, gcc */ - DEBUG(2, "Format type: %u, Error Detection: %u, offset = %u, length =%u", - t->type, t->edc, t->offset, t->length); - break; + if (!pcmcia_parse_tuple(tuple, &parse)) { + cistpl_format_t *t = &parse.format; + (void)t; /* Shut up, gcc */ + DEBUG(2, "Format type: %u, Error Detection: %u, offset = %u, length =%u", + t->type, t->edc, t->offset, t->length); + } + return -ENOSPC; +} - } +static int pcmciamtd_cistpl_jedec(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data) +{ + cisparse_t parse; + int i; - case CISTPL_DEVICE: { - cistpl_device_t *t = &parse.device; - int i; - DEBUG(2, "Common memory:"); - dev->pcmcia_map.size = t->dev[0].size; - for(i = 0; i < t->ndev; i++) { - DEBUG(2, "Region %d, type = %u", i, t->dev[i].type); - DEBUG(2, "Region %d, wp = %u", i, t->dev[i].wp); - DEBUG(2, "Region %d, speed = %u ns", i, t->dev[i].speed); - DEBUG(2, "Region %d, size = %u bytes", i, t->dev[i].size); - } - break; - } + if (!pcmcia_parse_tuple(tuple, &parse)) { + cistpl_jedec_t *t = &parse.jedec; + for (i = 0; i < t->nid; i++) + DEBUG(2, "JEDEC: 0x%02x 0x%02x", t->id[i].mfr, t->id[i].info); + } + return -ENOSPC; +} +#endif - case CISTPL_VERS_1: { - cistpl_vers_1_t *t = &parse.version_1; - int i; - if(t->ns) { - dev->mtd_name[0] = '\0'; - for(i = 0; i < t->ns; i++) { - if(i) - strcat(dev->mtd_name, " "); - strcat(dev->mtd_name, t->str+t->ofs[i]); - } - } - DEBUG(2, "Found name: %s", dev->mtd_name); - break; - } +static int pcmciamtd_cistpl_device(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data) +{ + struct pcmciamtd_dev *dev = priv_data; + cisparse_t parse; + cistpl_device_t *t = &parse.device; + int i; - case CISTPL_JEDEC_C: { - cistpl_jedec_t *t = &parse.jedec; - int i; - for(i = 0; i < t->nid; i++) { - DEBUG(2, "JEDEC: 0x%02x 0x%02x", t->id[i].mfr, t->id[i].info); - } - break; - } + if (pcmcia_parse_tuple(tuple, &parse)) + return -EINVAL; + + DEBUG(2, "Common memory:"); + dev->pcmcia_map.size = t->dev[0].size; + /* from here on: DEBUG only */ + for (i = 0; i < t->ndev; i++) { + DEBUG(2, "Region %d, type = %u", i, t->dev[i].type); + DEBUG(2, "Region %d, wp = %u", i, t->dev[i].wp); + DEBUG(2, "Region %d, speed = %u ns", i, t->dev[i].speed); + DEBUG(2, "Region %d, size = %u bytes", i, t->dev[i].size); + } + return 0; +} - case CISTPL_DEVICE_GEO: { - cistpl_device_geo_t *t = &parse.device_geo; - int i; - dev->pcmcia_map.bankwidth = t->geo[0].buswidth; - for(i = 0; i < t->ngeo; i++) { - DEBUG(2, "region: %d bankwidth = %u", i, t->geo[i].buswidth); - DEBUG(2, "region: %d erase_block = %u", i, t->geo[i].erase_block); - DEBUG(2, "region: %d read_block = %u", i, t->geo[i].read_block); - DEBUG(2, "region: %d write_block = %u", i, t->geo[i].write_block); - DEBUG(2, "region: %d partition = %u", i, t->geo[i].partition); - DEBUG(2, "region: %d interleave = %u", i, t->geo[i].interleave); - } - break; - } +static int pcmciamtd_cistpl_geo(struct pcmcia_device *p_dev, + tuple_t *tuple, + void *priv_data) +{ + struct pcmciamtd_dev *dev = priv_data; + cisparse_t parse; + cistpl_device_geo_t *t = &parse.device_geo; + int i; - default: - DEBUG(2, "Unknown tuple code %d", tuple.TupleCode); - } + if (pcmcia_parse_tuple(tuple, &parse)) + return -EINVAL; + + dev->pcmcia_map.bankwidth = t->geo[0].buswidth; + /* from here on: DEBUG only */ + for (i = 0; i < t->ngeo; i++) { + DEBUG(2, "region: %d bankwidth = %u", i, t->geo[i].buswidth); + DEBUG(2, "region: %d erase_block = %u", i, t->geo[i].erase_block); + DEBUG(2, "region: %d read_block = %u", i, t->geo[i].read_block); + DEBUG(2, "region: %d write_block = %u", i, t->geo[i].write_block); + DEBUG(2, "region: %d partition = %u", i, t->geo[i].partition); + DEBUG(2, "region: %d interleave = %u", i, t->geo[i].interleave); + } + return 0; +} + + +static void card_settings(struct pcmciamtd_dev *dev, struct pcmcia_device *link, int *new_name) +{ + int i; - rc = pcmcia_get_next_tuple(link, &tuple); + if (p_dev->prod_id[0]) { + dev->mtd_name[0] = '\0'; + for (i = 0; i < 4; i++) { + if (i) + strcat(dev->mtd_name, " "); + if (p_dev->prod_id[i]) + strcat(dev->mtd_name, p_dev->prod_id[i]); + } + DEBUG(2, "Found name: %s", dev->mtd_name); } + +#ifdef CONFIG_MTD_DEBUG + pcmcia_loop_tuple(p_dev, CISTPL_FORMAT, pcmciamtd_cistpl_format, NULL); + pcmcia_loop_tuple(p_dev, CISTPL_JEDEC_C, pcmciamtd_cistpl_jedec, NULL); +#endif + pcmcia_loop_tuple(p_dev, CISTPL_DEVICE, pcmciamtd_cistpl_device, dev); + pcmcia_loop_tuple(p_dev, CISTPL_DEVICE_GEO, pcmciamtd_cistpl_geo, dev); + if(!dev->pcmcia_map.size) dev->pcmcia_map.size = MAX_PCMCIA_ADDR; -- cgit v0.10.2 From 18a7a19b37838789452e0bd2855a51475628b971 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 19 Oct 2009 00:07:39 +0200 Subject: pcmcia: remove pcmcia_get_{first,next}_tuple() Remove the pcmcia_get_{first,next}_tuple() calls no longer needed by (current) pcmcia device drivers. Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 06a14c9..70432ca 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -206,6 +206,15 @@ int pccard_loop_tuple(struct pcmcia_socket *s, unsigned int function, cisparse_t *parse, void *priv_data)); +int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); + +int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, + tuple_t *tuple); + +int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); + + /* rsrc_mgr.c */ int pcmcia_validate_mem(struct pcmcia_socket *s); struct resource *pcmcia_find_io_region(unsigned long base, diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 2eb6e24..6c37d4e 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -229,21 +229,6 @@ int pcmcia_reset_card(struct pcmcia_socket *skt); int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, conf_reg_t *reg); -/* deprecated -- do not use in drivers. */ -int pccard_get_first_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_first_tuple(p_dev, tuple) \ - pccard_get_first_tuple(p_dev->socket, p_dev->func, tuple) - -int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, - tuple_t *tuple); -#define pcmcia_get_next_tuple(p_dev, tuple) \ - pccard_get_next_tuple(p_dev->socket, p_dev->func, tuple) - -int pccard_get_tuple_data(struct pcmcia_socket *s, tuple_t *tuple); -#define pcmcia_get_tuple_data(p_dev, tuple) \ - pccard_get_tuple_data(p_dev->socket, tuple) - /* device configuration */ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req); int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req); -- cgit v0.10.2 From d50dbec3ce52e1608636b8a624d087da9ced8cde Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 23 Oct 2009 12:51:28 +0200 Subject: pcmcia: use dynamic debug instead of custom infrastructure Use the generic "dynamic debug" infrastructure instead of CONIG_PCMCIA_DEBUG in the PCMCIA core (pcmcia.ko and pcmcia_core.ko). To enable debugging, enable CONFIG_DYNAMIC_DEBUG, mount debugfs and $ echo -n 'module pcmcia_core +p' > /sys/kernel/debug/dynamic_debug/control for the complete module "pcmcia_core", for example. For more detailled instructions, please see Documentation/dynamic-debug-howto.txt Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c index db77e1f..4cd70d0 100644 --- a/drivers/pcmcia/cardbus.c +++ b/drivers/pcmcia/cardbus.c @@ -91,7 +91,7 @@ static u_int xlate_rom_addr(void __iomem *b, u_int addr) static void cb_release_cis_mem(struct pcmcia_socket * s) { if (s->cb_cis_virt) { - cs_dbg(s, 1, "cb_release_cis_mem()\n"); + dev_dbg(&s->dev, "cb_release_cis_mem()\n"); iounmap(s->cb_cis_virt); s->cb_cis_virt = NULL; s->cb_cis_res = NULL; @@ -132,7 +132,7 @@ int read_cb_mem(struct pcmcia_socket * s, int space, u_int addr, u_int len, void struct pci_dev *dev; struct resource *res; - cs_dbg(s, 3, "read_cb_mem(%d, %#x, %u)\n", space, addr, len); + dev_dbg(&s->dev, "read_cb_mem(%d, %#x, %u)\n", space, addr, len); dev = pci_get_slot(s->cb_dev->subordinate, 0); if (!dev) diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c index 24dd3c1..8c1b73c 100644 --- a/drivers/pcmcia/cistpl.c +++ b/drivers/pcmcia/cistpl.c @@ -138,7 +138,7 @@ int pcmcia_read_cis_mem(struct pcmcia_socket *s, int attr, u_int addr, void __iomem *sys, *end; unsigned char *buf = ptr; - cs_dbg(s, 3, "pcmcia_read_cis_mem(%d, %#x, %u)\n", attr, addr, len); + dev_dbg(&s->dev, "pcmcia_read_cis_mem(%d, %#x, %u)\n", attr, addr, len); if (attr & IS_INDIRECT) { /* Indirect accesses use a bunch of special registers at fixed @@ -190,7 +190,7 @@ int pcmcia_read_cis_mem(struct pcmcia_socket *s, int attr, u_int addr, addr = 0; } } - cs_dbg(s, 3, " %#2.2x %#2.2x %#2.2x %#2.2x ...\n", + dev_dbg(&s->dev, " %#2.2x %#2.2x %#2.2x %#2.2x ...\n", *(u_char *)(ptr+0), *(u_char *)(ptr+1), *(u_char *)(ptr+2), *(u_char *)(ptr+3)); return 0; @@ -204,7 +204,7 @@ void pcmcia_write_cis_mem(struct pcmcia_socket *s, int attr, u_int addr, void __iomem *sys, *end; unsigned char *buf = ptr; - cs_dbg(s, 3, "pcmcia_write_cis_mem(%d, %#x, %u)\n", attr, addr, len); + dev_dbg(&s->dev, "pcmcia_write_cis_mem(%d, %#x, %u)\n", attr, addr, len); if (attr & IS_INDIRECT) { /* Indirect accesses use a bunch of special registers at fixed @@ -584,7 +584,7 @@ int pccard_get_next_tuple(struct pcmcia_socket *s, unsigned int function, tuple_ ofs += link[1] + 2; } if (i == MAX_TUPLES) { - cs_dbg(s, 1, "cs: overrun in pcmcia_get_next_tuple\n"); + dev_dbg(&s->dev, "cs: overrun in pcmcia_get_next_tuple\n"); return -ENOSPC; } @@ -1440,7 +1440,7 @@ int pcmcia_parse_tuple(tuple_t *tuple, cisparse_t *parse) break; } if (ret) - __cs_dbg(0, "parse_tuple failed %d\n", ret); + pr_debug("parse_tuple failed %d\n", ret); return ret; } EXPORT_SYMBOL(pcmcia_parse_tuple); diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index 934d4be..b0ec9c6 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -61,17 +61,6 @@ INT_MODULE_PARM(unreset_limit, 30); /* unreset_check's */ /* Access speed for attribute memory windows */ INT_MODULE_PARM(cis_speed, 300); /* ns */ -#ifdef CONFIG_PCMCIA_DEBUG -static int pc_debug; - -module_param(pc_debug, int, 0644); - -int cs_debug_level(int level) -{ - return pc_debug > level; -} -#endif - socket_state_t dead_socket = { .csc_mask = SS_DETECT, @@ -182,7 +171,7 @@ int pcmcia_register_socket(struct pcmcia_socket *socket) if (!socket || !socket->ops || !socket->dev.parent || !socket->resource_ops) return -EINVAL; - cs_dbg(socket, 0, "pcmcia_register_socket(0x%p)\n", socket->ops); + dev_dbg(&socket->dev, "pcmcia_register_socket(0x%p)\n", socket->ops); spin_lock_init(&socket->lock); @@ -274,7 +263,7 @@ void pcmcia_unregister_socket(struct pcmcia_socket *socket) if (!socket) return; - cs_dbg(socket, 0, "pcmcia_unregister_socket(0x%p)\n", socket->ops); + dev_dbg(&socket->dev, "pcmcia_unregister_socket(0x%p)\n", socket->ops); if (socket->thread) kthread_stop(socket->thread); @@ -327,7 +316,7 @@ static int send_event(struct pcmcia_socket *s, event_t event, int priority) if (s->state & SOCKET_CARDBUS) return 0; - cs_dbg(s, 1, "send_event(event %d, pri %d, callback 0x%p)\n", + dev_dbg(&s->dev, "send_event(event %d, pri %d, callback 0x%p)\n", event, priority, s->callback); if (!s->callback) @@ -344,7 +333,7 @@ static int send_event(struct pcmcia_socket *s, event_t event, int priority) static void socket_remove_drivers(struct pcmcia_socket *skt) { - cs_dbg(skt, 4, "remove_drivers\n"); + dev_dbg(&skt->dev, "remove_drivers\n"); send_event(skt, CS_EVENT_CARD_REMOVAL, CS_EVENT_PRI_HIGH); } @@ -353,7 +342,7 @@ static int socket_reset(struct pcmcia_socket *skt) { int status, i; - cs_dbg(skt, 4, "reset\n"); + dev_dbg(&skt->dev, "reset\n"); skt->socket.flags |= SS_OUTPUT_ENA | SS_RESET; skt->ops->set_socket(skt, &skt->socket); @@ -375,7 +364,7 @@ static int socket_reset(struct pcmcia_socket *skt) msleep(unreset_check * 10); } - cs_err(skt, "time out after reset.\n"); + dev_printk(KERN_ERR, &skt->dev, "time out after reset.\n"); return -ETIMEDOUT; } @@ -389,7 +378,7 @@ static void socket_shutdown(struct pcmcia_socket *s) { int status; - cs_dbg(s, 4, "shutdown\n"); + dev_dbg(&s->dev, "shutdown\n"); socket_remove_drivers(s); s->state &= SOCKET_INUSE | SOCKET_PRESENT; @@ -424,7 +413,7 @@ static int socket_setup(struct pcmcia_socket *skt, int initial_delay) { int status, i; - cs_dbg(skt, 4, "setup\n"); + dev_dbg(&skt->dev, "setup\n"); skt->ops->get_status(skt, &status); if (!(status & SS_DETECT)) @@ -444,13 +433,15 @@ static int socket_setup(struct pcmcia_socket *skt, int initial_delay) } if (status & SS_PENDING) { - cs_err(skt, "voltage interrogation timed out.\n"); + dev_printk(KERN_ERR, &skt->dev, + "voltage interrogation timed out.\n"); return -ETIMEDOUT; } if (status & SS_CARDBUS) { if (!(skt->features & SS_CAP_CARDBUS)) { - cs_err(skt, "cardbus cards are not supported.\n"); + dev_printk(KERN_ERR, &skt->dev, + "cardbus cards are not supported.\n"); return -EINVAL; } skt->state |= SOCKET_CARDBUS; @@ -464,7 +455,7 @@ static int socket_setup(struct pcmcia_socket *skt, int initial_delay) else if (!(status & SS_XVCARD)) skt->socket.Vcc = skt->socket.Vpp = 50; else { - cs_err(skt, "unsupported voltage key.\n"); + dev_printk(KERN_ERR, &skt->dev, "unsupported voltage key.\n"); return -EIO; } @@ -481,7 +472,7 @@ static int socket_setup(struct pcmcia_socket *skt, int initial_delay) skt->ops->get_status(skt, &status); if (!(status & SS_POWERON)) { - cs_err(skt, "unable to apply power.\n"); + dev_printk(KERN_ERR, &skt->dev, "unable to apply power.\n"); return -EIO; } @@ -501,7 +492,7 @@ static int socket_insert(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 4, "insert\n"); + dev_dbg(&skt->dev, "insert\n"); if (!cs_socket_get(skt)) return -ENODEV; @@ -521,7 +512,7 @@ static int socket_insert(struct pcmcia_socket *skt) skt->state |= SOCKET_CARDBUS_CONFIG; } #endif - cs_dbg(skt, 4, "insert done\n"); + dev_dbg(&skt->dev, "insert done\n"); send_event(skt, CS_EVENT_CARD_INSERTION, CS_EVENT_PRI_LOW); } else { @@ -573,7 +564,7 @@ static int socket_resume(struct pcmcia_socket *skt) * FIXME: need a better check here for cardbus cards. */ if (verify_cis_cache(skt) != 0) { - cs_dbg(skt, 4, "cis mismatch - different card\n"); + dev_dbg(&skt->dev, "cis mismatch - different card\n"); socket_remove_drivers(skt); destroy_cis_cache(skt); /* @@ -584,7 +575,7 @@ static int socket_resume(struct pcmcia_socket *skt) msleep(200); send_event(skt, CS_EVENT_CARD_INSERTION, CS_EVENT_PRI_LOW); } else { - cs_dbg(skt, 4, "cis matches cache\n"); + dev_dbg(&skt->dev, "cis matches cache\n"); send_event(skt, CS_EVENT_PM_RESUME, CS_EVENT_PRI_LOW); } } else { @@ -706,7 +697,7 @@ static int pccardd(void *__skt) void pcmcia_parse_events(struct pcmcia_socket *s, u_int events) { unsigned long flags; - cs_dbg(s, 4, "parse_events: events %08x\n", events); + dev_dbg(&s->dev, "parse_events: events %08x\n", events); if (s->thread) { spin_lock_irqsave(&s->thread_lock, flags); s->thread_events |= events; @@ -756,7 +747,7 @@ int pcmcia_reset_card(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 1, "resetting socket\n"); + dev_dbg(&skt->dev, "resetting socket\n"); mutex_lock(&skt->skt_mutex); do { @@ -801,7 +792,7 @@ int pcmcia_suspend_card(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 1, "suspending socket\n"); + dev_dbg(&skt->dev, "suspending socket\n"); mutex_lock(&skt->skt_mutex); do { @@ -831,7 +822,7 @@ int pcmcia_resume_card(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 1, "waking up socket\n"); + dev_dbg(&skt->dev, "waking up socket\n"); mutex_lock(&skt->skt_mutex); do { @@ -859,7 +850,7 @@ int pcmcia_eject_card(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 1, "user eject request\n"); + dev_dbg(&skt->dev, "user eject request\n"); mutex_lock(&skt->skt_mutex); do { @@ -888,7 +879,7 @@ int pcmcia_insert_card(struct pcmcia_socket *skt) { int ret; - cs_dbg(skt, 1, "user insert request\n"); + dev_dbg(&skt->dev, "user insert request\n"); mutex_lock(&skt->skt_mutex); do { diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 70432ca..0a3ada9 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -107,28 +107,6 @@ static inline void cs_socket_put(struct pcmcia_socket *skt) } } -#ifdef CONFIG_PCMCIA_DEBUG -extern int cs_debug_level(int); - -#define cs_dbg(skt, lvl, fmt, arg...) do { \ - if (cs_debug_level(lvl)) \ - dev_printk(KERN_DEBUG, &skt->dev, \ - "cs: " fmt, ## arg); \ -} while (0) -#define __cs_dbg(lvl, fmt, arg...) do { \ - if (cs_debug_level(lvl)) \ - printk(KERN_DEBUG \ - "cs: " fmt, ## arg); \ -} while (0) - -#else -#define cs_dbg(skt, lvl, fmt, arg...) do { } while (0) -#define __cs_dbg(lvl, fmt, arg...) do { } while (0) -#endif - -#define cs_err(skt, fmt, arg...) \ - dev_printk(KERN_ERR, &skt->dev, "cs: " fmt, ## arg) - /* * Stuff internal to module "pcmcia_core": diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index f5b7079..5b069ae 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -41,23 +41,6 @@ MODULE_AUTHOR("David Hinds "); MODULE_DESCRIPTION("PCMCIA Driver Services"); MODULE_LICENSE("GPL"); -#ifdef CONFIG_PCMCIA_DEBUG -int ds_pc_debug; - -module_param_named(pc_debug, ds_pc_debug, int, 0644); - -#define ds_dbg(lvl, fmt, arg...) do { \ - if (ds_pc_debug > (lvl)) \ - printk(KERN_DEBUG "ds: " fmt , ## arg); \ -} while (0) -#define ds_dev_dbg(lvl, dev, fmt, arg...) do { \ - if (ds_pc_debug > (lvl)) \ - dev_printk(KERN_DEBUG, dev, "ds: " fmt , ## arg); \ -} while (0) -#else -#define ds_dbg(lvl, fmt, arg...) do { } while (0) -#define ds_dev_dbg(lvl, dev, fmt, arg...) do { } while (0) -#endif spinlock_t pcmcia_dev_list_lock; @@ -303,7 +286,7 @@ int pcmcia_register_driver(struct pcmcia_driver *driver) spin_lock_init(&driver->dynids.lock); INIT_LIST_HEAD(&driver->dynids.list); - ds_dbg(3, "registering driver %s\n", driver->drv.name); + pr_debug("registering driver %s\n", driver->drv.name); error = driver_register(&driver->drv); if (error < 0) @@ -323,7 +306,7 @@ EXPORT_SYMBOL(pcmcia_register_driver); */ void pcmcia_unregister_driver(struct pcmcia_driver *driver) { - ds_dbg(3, "unregistering driver %s\n", driver->drv.name); + pr_debug("unregistering driver %s\n", driver->drv.name); driver_unregister(&driver->drv); pcmcia_free_dynids(driver); } @@ -350,14 +333,14 @@ void pcmcia_put_dev(struct pcmcia_device *p_dev) static void pcmcia_release_function(struct kref *ref) { struct config_t *c = container_of(ref, struct config_t, ref); - ds_dbg(1, "releasing config_t\n"); + pr_debug("releasing config_t\n"); kfree(c); } static void pcmcia_release_dev(struct device *dev) { struct pcmcia_device *p_dev = to_pcmcia_dev(dev); - ds_dev_dbg(1, dev, "releasing device\n"); + dev_dbg(dev, "releasing device\n"); pcmcia_put_socket(p_dev->socket); kfree(p_dev->devname); kref_put(&p_dev->function_config->ref, pcmcia_release_function); @@ -367,7 +350,7 @@ static void pcmcia_release_dev(struct device *dev) static void pcmcia_add_device_later(struct pcmcia_socket *s, int mfc) { if (!s->pcmcia_state.device_add_pending) { - ds_dev_dbg(1, &s->dev, "scheduling to add %s secondary" + dev_dbg(&s->dev, "scheduling to add %s secondary" " device to %d\n", mfc ? "mfc" : "pfc", s->sock); s->pcmcia_state.device_add_pending = 1; s->pcmcia_state.mfc_pfc = mfc; @@ -405,7 +388,7 @@ static int pcmcia_device_probe(struct device * dev) */ did = dev_get_drvdata(&p_dev->dev); - ds_dev_dbg(1, dev, "trying to bind to %s\n", p_drv->drv.name); + dev_dbg(dev, "trying to bind to %s\n", p_drv->drv.name); if ((!p_drv->probe) || (!p_dev->function_config) || (!try_module_get(p_drv->owner))) { @@ -428,7 +411,7 @@ static int pcmcia_device_probe(struct device * dev) ret = p_drv->probe(p_dev); if (ret) { - ds_dev_dbg(1, dev, "binding to %s failed with %d\n", + dev_dbg(dev, "binding to %s failed with %d\n", p_drv->drv.name, ret); goto put_module; } @@ -456,7 +439,7 @@ static void pcmcia_card_remove(struct pcmcia_socket *s, struct pcmcia_device *le struct pcmcia_device *tmp; unsigned long flags; - ds_dev_dbg(2, leftover ? &leftover->dev : &s->dev, + dev_dbg(leftover ? &leftover->dev : &s->dev, "pcmcia_card_remove(%d) %s\n", s->sock, leftover ? leftover->devname : ""); @@ -475,7 +458,7 @@ static void pcmcia_card_remove(struct pcmcia_socket *s, struct pcmcia_device *le p_dev->_removed=1; spin_unlock_irqrestore(&pcmcia_dev_list_lock, flags); - ds_dev_dbg(2, &p_dev->dev, "unregistering device\n"); + dev_dbg(&p_dev->dev, "unregistering device\n"); device_unregister(&p_dev->dev); } @@ -492,7 +475,7 @@ static int pcmcia_device_remove(struct device * dev) p_dev = to_pcmcia_dev(dev); p_drv = to_pcmcia_drv(dev->driver); - ds_dev_dbg(1, dev, "removing device\n"); + dev_dbg(dev, "removing device\n"); /* If we're removing the primary module driving a * pseudo multi-function card, we need to unbind @@ -572,7 +555,7 @@ static int pcmcia_device_query(struct pcmcia_device *p_dev) } if (!pccard_read_tuple(p_dev->socket, p_dev->func, CISTPL_DEVICE_GEO, devgeo)) { - ds_dev_dbg(0, &p_dev->dev, + dev_dbg(&p_dev->dev, "mem device geometry probably means " "FUNCID_MEMORY\n"); p_dev->func_id = CISTPL_FUNCID_MEMORY; @@ -628,7 +611,7 @@ struct pcmcia_device * pcmcia_device_add(struct pcmcia_socket *s, unsigned int f mutex_lock(&device_add_lock); - ds_dbg(3, "adding device to %d, function %d\n", s->sock, function); + pr_debug("adding device to %d, function %d\n", s->sock, function); /* max of 4 devices per card */ if (s->device_count == 4) @@ -654,7 +637,7 @@ struct pcmcia_device * pcmcia_device_add(struct pcmcia_socket *s, unsigned int f p_dev->devname = kasprintf(GFP_KERNEL, "pcmcia%s", dev_name(&p_dev->dev)); if (!p_dev->devname) goto err_free; - ds_dev_dbg(3, &p_dev->dev, "devname is %s\n", p_dev->devname); + dev_dbg(&p_dev->dev, "devname is %s\n", p_dev->devname); spin_lock_irqsave(&pcmcia_dev_list_lock, flags); @@ -677,7 +660,7 @@ struct pcmcia_device * pcmcia_device_add(struct pcmcia_socket *s, unsigned int f spin_unlock_irqrestore(&pcmcia_dev_list_lock, flags); if (!p_dev->function_config) { - ds_dev_dbg(3, &p_dev->dev, "creating config_t\n"); + dev_dbg(&p_dev->dev, "creating config_t\n"); p_dev->function_config = kzalloc(sizeof(struct config_t), GFP_KERNEL); if (!p_dev->function_config) @@ -722,20 +705,20 @@ static int pcmcia_card_add(struct pcmcia_socket *s) int ret = 0; if (!(s->resource_setup_done)) { - ds_dev_dbg(3, &s->dev, + dev_dbg(&s->dev, "no resources available, delaying card_add\n"); return -EAGAIN; /* try again, but later... */ } if (pcmcia_validate_mem(s)) { - ds_dev_dbg(3, &s->dev, "validating mem resources failed, " + dev_dbg(&s->dev, "validating mem resources failed, " "delaying card_add\n"); return -EAGAIN; /* try again, but later... */ } ret = pccard_validate_cis(s, &no_chains); if (ret || !no_chains) { - ds_dev_dbg(0, &s->dev, "invalid CIS or invalid resources\n"); + dev_dbg(&s->dev, "invalid CIS or invalid resources\n"); return -ENODEV; } @@ -756,7 +739,7 @@ static void pcmcia_delayed_add_device(struct work_struct *work) { struct pcmcia_socket *s = container_of(work, struct pcmcia_socket, device_add); - ds_dev_dbg(1, &s->dev, "adding additional device to %d\n", s->sock); + dev_dbg(&s->dev, "adding additional device to %d\n", s->sock); pcmcia_device_add(s, s->pcmcia_state.mfc_pfc); s->pcmcia_state.device_add_pending = 0; s->pcmcia_state.mfc_pfc = 0; @@ -766,7 +749,7 @@ static int pcmcia_requery(struct device *dev, void * _data) { struct pcmcia_device *p_dev = to_pcmcia_dev(dev); if (!p_dev->dev.driver) { - ds_dev_dbg(1, dev, "update device information\n"); + dev_dbg(dev, "update device information\n"); pcmcia_device_query(p_dev); } @@ -780,7 +763,7 @@ static void pcmcia_bus_rescan(struct pcmcia_socket *skt, int new_cis) unsigned long flags; /* must be called with skt_mutex held */ - ds_dev_dbg(0, &skt->dev, "re-scanning socket %d\n", skt->sock); + dev_dbg(&skt->dev, "re-scanning socket %d\n", skt->sock); spin_lock_irqsave(&pcmcia_dev_list_lock, flags); if (list_empty(&skt->devices_list)) @@ -835,7 +818,7 @@ static int pcmcia_load_firmware(struct pcmcia_device *dev, char * filename) if (!filename) return -EINVAL; - ds_dev_dbg(1, &dev->dev, "trying to load CIS file %s\n", filename); + dev_dbg(&dev->dev, "trying to load CIS file %s\n", filename); if (request_firmware(&fw, filename, &dev->dev) == 0) { if (fw->size >= CISTPL_MAX_CIS_SIZE) { @@ -953,14 +936,14 @@ static inline int pcmcia_devmatch(struct pcmcia_device *dev, * after it has re-checked that there is no possible module * with a prod_id/manf_id/card_id match. */ - ds_dev_dbg(0, &dev->dev, + dev_dbg(&dev->dev, "skipping FUNC_ID match until userspace interaction\n"); if (!dev->allow_func_id_match) return 0; } if (did->match_flags & PCMCIA_DEV_ID_MATCH_FAKE_CIS) { - ds_dev_dbg(0, &dev->dev, "device needs a fake CIS\n"); + dev_dbg(&dev->dev, "device needs a fake CIS\n"); if (!dev->socket->fake_cis) pcmcia_load_firmware(dev, did->cisfile); @@ -992,9 +975,9 @@ static int pcmcia_bus_match(struct device * dev, struct device_driver * drv) { /* match dynamic devices first */ spin_lock(&p_drv->dynids.lock); list_for_each_entry(dynid, &p_drv->dynids.list, node) { - ds_dev_dbg(3, dev, "trying to match to %s\n", drv->name); + dev_dbg(dev, "trying to match to %s\n", drv->name); if (pcmcia_devmatch(p_dev, &dynid->id)) { - ds_dev_dbg(0, dev, "matched to %s\n", drv->name); + dev_dbg(dev, "matched to %s\n", drv->name); spin_unlock(&p_drv->dynids.lock); return 1; } @@ -1004,15 +987,15 @@ static int pcmcia_bus_match(struct device * dev, struct device_driver * drv) { #ifdef CONFIG_PCMCIA_IOCTL /* matching by cardmgr */ if (p_dev->cardmgr == p_drv) { - ds_dev_dbg(0, dev, "cardmgr matched to %s\n", drv->name); + dev_dbg(dev, "cardmgr matched to %s\n", drv->name); return 1; } #endif while (did && did->match_flags) { - ds_dev_dbg(3, dev, "trying to match to %s\n", drv->name); + dev_dbg(dev, "trying to match to %s\n", drv->name); if (pcmcia_devmatch(p_dev, did)) { - ds_dev_dbg(0, dev, "matched to %s\n", drv->name); + dev_dbg(dev, "matched to %s\n", drv->name); return 1; } did++; @@ -1218,7 +1201,7 @@ static int pcmcia_dev_suspend(struct device * dev, pm_message_t state) if (p_dev->suspended) return 0; - ds_dev_dbg(2, dev, "suspending\n"); + dev_dbg(dev, "suspending\n"); if (dev->driver) p_drv = to_pcmcia_drv(dev->driver); @@ -1238,7 +1221,7 @@ static int pcmcia_dev_suspend(struct device * dev, pm_message_t state) } if (p_dev->device_no == p_dev->func) { - ds_dev_dbg(2, dev, "releasing configuration\n"); + dev_dbg(dev, "releasing configuration\n"); pcmcia_release_configuration(p_dev); } @@ -1258,7 +1241,7 @@ static int pcmcia_dev_resume(struct device * dev) if (!p_dev->suspended) return 0; - ds_dev_dbg(2, dev, "resuming\n"); + dev_dbg(dev, "resuming\n"); if (dev->driver) p_drv = to_pcmcia_drv(dev->driver); @@ -1267,7 +1250,7 @@ static int pcmcia_dev_resume(struct device * dev) goto out; if (p_dev->device_no == p_dev->func) { - ds_dev_dbg(2, dev, "requesting configuration\n"); + dev_dbg(dev, "requesting configuration\n"); ret = pcmcia_request_configuration(p_dev, &p_dev->conf); if (ret) goto out; @@ -1309,14 +1292,14 @@ static int pcmcia_bus_resume_callback(struct device *dev, void * _data) static int pcmcia_bus_resume(struct pcmcia_socket *skt) { - ds_dev_dbg(2, &skt->dev, "resuming socket %d\n", skt->sock); + dev_dbg(&skt->dev, "resuming socket %d\n", skt->sock); bus_for_each_dev(&pcmcia_bus_type, NULL, skt, pcmcia_bus_resume_callback); return 0; } static int pcmcia_bus_suspend(struct pcmcia_socket *skt) { - ds_dev_dbg(2, &skt->dev, "suspending socket %d\n", skt->sock); + dev_dbg(&skt->dev, "suspending socket %d\n", skt->sock); if (bus_for_each_dev(&pcmcia_bus_type, NULL, skt, pcmcia_bus_suspend_callback)) { pcmcia_bus_resume(skt); @@ -1348,7 +1331,7 @@ static int ds_event(struct pcmcia_socket *skt, event_t event, int priority) return -ENODEV; } - ds_dev_dbg(1, &skt->dev, "ds_event(0x%06x, %d, 0x%p)\n", + dev_dbg(&skt->dev, "ds_event(0x%06x, %d, 0x%p)\n", event, priority, skt); switch (event) { diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c index 30cf71d..056fd13 100644 --- a/drivers/pcmcia/pcmcia_ioctl.c +++ b/drivers/pcmcia/pcmcia_ioctl.c @@ -58,17 +58,6 @@ typedef struct user_info_t { } user_info_t; -#ifdef CONFIG_PCMCIA_DEBUG -extern int ds_pc_debug; - -#define ds_dbg(lvl, fmt, arg...) do { \ - if (ds_pc_debug >= lvl) \ - printk(KERN_DEBUG "ds: " fmt , ## arg); \ -} while (0) -#else -#define ds_dbg(lvl, fmt, arg...) do { } while (0) -#endif - static struct pcmcia_device *get_pcmcia_device(struct pcmcia_socket *s, unsigned int function) { @@ -431,7 +420,7 @@ static int bind_request(struct pcmcia_socket *s, bind_info_t *bind_info) if (!s) return -EINVAL; - ds_dbg(2, "bind_request(%d, '%s')\n", s->sock, + pr_debug("bind_request(%d, '%s')\n", s->sock, (char *)bind_info->dev_info); p_drv = get_pcmcia_driver(&bind_info->dev_info); @@ -623,7 +612,7 @@ static int ds_open(struct inode *inode, struct file *file) static int warning_printed = 0; int ret = 0; - ds_dbg(0, "ds_open(socket %d)\n", i); + pr_debug("ds_open(socket %d)\n", i); lock_kernel(); s = pcmcia_get_socket_by_nr(i); @@ -685,7 +674,7 @@ static int ds_release(struct inode *inode, struct file *file) struct pcmcia_socket *s; user_info_t *user, **link; - ds_dbg(0, "ds_release(socket %d)\n", iminor(inode)); + pr_debug("ds_release(socket %d)\n", iminor(inode)); user = file->private_data; if (CHECK_USER(user)) @@ -719,7 +708,7 @@ static ssize_t ds_read(struct file *file, char __user *buf, user_info_t *user; int ret; - ds_dbg(2, "ds_read(socket %d)\n", iminor(file->f_path.dentry->d_inode)); + pr_debug("ds_read(socket %d)\n", iminor(file->f_path.dentry->d_inode)); if (count < 4) return -EINVAL; @@ -744,7 +733,7 @@ static ssize_t ds_read(struct file *file, char __user *buf, static ssize_t ds_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - ds_dbg(2, "ds_write(socket %d)\n", iminor(file->f_path.dentry->d_inode)); + pr_debug("ds_write(socket %d)\n", iminor(file->f_path.dentry->d_inode)); if (count != 4) return -EINVAL; @@ -762,7 +751,7 @@ static u_int ds_poll(struct file *file, poll_table *wait) struct pcmcia_socket *s; user_info_t *user; - ds_dbg(2, "ds_poll(socket %d)\n", iminor(file->f_path.dentry->d_inode)); + pr_debug("ds_poll(socket %d)\n", iminor(file->f_path.dentry->d_inode)); user = file->private_data; if (CHECK_USER(user)) @@ -790,7 +779,7 @@ static int ds_ioctl(struct inode * inode, struct file * file, ds_ioctl_arg_t *buf; user_info_t *user; - ds_dbg(2, "ds_ioctl(socket %d, %#x, %#lx)\n", iminor(inode), cmd, arg); + pr_debug("ds_ioctl(socket %d, %#x, %#lx)\n", iminor(inode), cmd, arg); user = file->private_data; if (CHECK_USER(user)) @@ -809,13 +798,13 @@ static int ds_ioctl(struct inode * inode, struct file * file, if (cmd & IOC_IN) { if (!access_ok(VERIFY_READ, uarg, size)) { - ds_dbg(3, "ds_ioctl(): verify_read = %d\n", -EFAULT); + pr_debug("ds_ioctl(): verify_read = %d\n", -EFAULT); return -EFAULT; } } if (cmd & IOC_OUT) { if (!access_ok(VERIFY_WRITE, uarg, size)) { - ds_dbg(3, "ds_ioctl(): verify_write = %d\n", -EFAULT); + pr_debug("ds_ioctl(): verify_write = %d\n", -EFAULT); return -EFAULT; } } @@ -962,7 +951,7 @@ static int ds_ioctl(struct inode * inode, struct file * file, } if ((err == 0) && (ret != 0)) { - ds_dbg(2, "ds_ioctl: ret = %d\n", ret); + pr_debug("ds_ioctl: ret = %d\n", ret); switch (ret) { case -ENODEV: case -EINVAL: diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index 0bfb05a..349bc56 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -44,21 +44,6 @@ static u8 pcmcia_used_irq[NR_IRQS]; #endif -#ifdef CONFIG_PCMCIA_DEBUG -extern int ds_pc_debug; - -#define ds_dbg(skt, lvl, fmt, arg...) do { \ - if (ds_pc_debug >= lvl) \ - dev_printk(KERN_DEBUG, &skt->dev, \ - "pcmcia_resource: " fmt, \ - ## arg); \ -} while (0) -#else -#define ds_dbg(skt, lvl, fmt, arg...) do { } while (0) -#endif - - - /** alloc_io_space * * Special stuff for managing IO windows, because they are scarce @@ -73,14 +58,14 @@ static int alloc_io_space(struct pcmcia_socket *s, u_int attr, align = (*base) ? (lines ? 1<dev, "odd IO request: num %#x align %#x\n", num, align); align = 0; } else while (align && (align < num)) align <<= 1; } if (*base & ~(align-1)) { - ds_dbg(s, 0, "odd IO request: base %#x align %#x\n", + dev_dbg(&s->dev, "odd IO request: base %#x align %#x\n", *base, align); align = 0; } @@ -253,12 +238,12 @@ int pcmcia_map_mem_page(window_handle_t win, memreq_t *req) return -EINVAL; s = win->sock; if (req->Page != 0) { - ds_dbg(s, 0, "failure: requested page is zero\n"); + dev_dbg(&s->dev, "failure: requested page is zero\n"); return -EINVAL; } win->ctl.card_start = req->CardOffset; if (s->ops->set_mem_map(s, &win->ctl) != 0) { - ds_dbg(s, 0, "failed to set_mem_map\n"); + dev_dbg(&s->dev, "failed to set_mem_map\n"); return -EIO; } return 0; @@ -296,7 +281,7 @@ int pcmcia_modify_configuration(struct pcmcia_device *p_dev, } if (mod->Attributes & CONF_VCC_CHANGE_VALID) { - ds_dbg(s, 0, "changing Vcc is not allowed at this time\n"); + dev_dbg(&s->dev, "changing Vcc is not allowed at this time\n"); return -EINVAL; } @@ -304,7 +289,7 @@ int pcmcia_modify_configuration(struct pcmcia_device *p_dev, if ((mod->Attributes & CONF_VPP1_CHANGE_VALID) && (mod->Attributes & CONF_VPP2_CHANGE_VALID)) { if (mod->Vpp1 != mod->Vpp2) { - ds_dbg(s, 0, "Vpp1 and Vpp2 must be the same\n"); + dev_dbg(&s->dev, "Vpp1 and Vpp2 must be the same\n"); return -EINVAL; } s->socket.Vpp = mod->Vpp1; @@ -315,7 +300,7 @@ int pcmcia_modify_configuration(struct pcmcia_device *p_dev, } } else if ((mod->Attributes & CONF_VPP1_CHANGE_VALID) || (mod->Attributes & CONF_VPP2_CHANGE_VALID)) { - ds_dbg(s, 0, "changing Vcc is not allowed at this time\n"); + dev_dbg(&s->dev, "changing Vcc is not allowed at this time\n"); return -EINVAL; } @@ -426,11 +411,11 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) if (c->state & CONFIG_LOCKED) return -EACCES; if (c->irq.Attributes != req->Attributes) { - ds_dbg(s, 0, "IRQ attributes must match assigned ones\n"); + dev_dbg(&s->dev, "IRQ attributes must match assigned ones\n"); return -EINVAL; } if (s->irq.AssignedIRQ != req->AssignedIRQ) { - ds_dbg(s, 0, "IRQ must match assigned one\n"); + dev_dbg(&s->dev, "IRQ must match assigned one\n"); return -EINVAL; } if (--s->irq.Config == 0) { @@ -493,7 +478,7 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, return -ENODEV; if (req->IntType & INT_CARDBUS) { - ds_dbg(p_dev->socket, 0, "IntType may not be INT_CARDBUS\n"); + dev_dbg(&s->dev, "IntType may not be INT_CARDBUS\n"); return -EINVAL; } c = p_dev->function_config; @@ -619,31 +604,31 @@ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req) if (c->state & CONFIG_LOCKED) return -EACCES; if (c->state & CONFIG_IO_REQ) { - ds_dbg(s, 0, "IO already configured\n"); + dev_dbg(&s->dev, "IO already configured\n"); return -EBUSY; } if (req->Attributes1 & (IO_SHARED | IO_FORCE_ALIAS_ACCESS)) { - ds_dbg(s, 0, "bad attribute setting for IO region 1\n"); + dev_dbg(&s->dev, "bad attribute setting for IO region 1\n"); return -EINVAL; } if ((req->NumPorts2 > 0) && (req->Attributes2 & (IO_SHARED | IO_FORCE_ALIAS_ACCESS))) { - ds_dbg(s, 0, "bad attribute setting for IO region 2\n"); + dev_dbg(&s->dev, "bad attribute setting for IO region 2\n"); return -EINVAL; } - ds_dbg(s, 1, "trying to allocate resource 1\n"); + dev_dbg(&s->dev, "trying to allocate resource 1\n"); if (alloc_io_space(s, req->Attributes1, &req->BasePort1, req->NumPorts1, req->IOAddrLines)) { - ds_dbg(s, 0, "allocation of resource 1 failed\n"); + dev_dbg(&s->dev, "allocation of resource 1 failed\n"); return -EBUSY; } if (req->NumPorts2) { - ds_dbg(s, 1, "trying to allocate resource 2\n"); + dev_dbg(&s->dev, "trying to allocate resource 2\n"); if (alloc_io_space(s, req->Attributes2, &req->BasePort2, req->NumPorts2, req->IOAddrLines)) { - ds_dbg(s, 0, "allocation of resource 2 failed\n"); + dev_dbg(&s->dev, "allocation of resource 2 failed\n"); release_io_space(s, req->BasePort1, req->NumPorts1); return -EBUSY; } @@ -687,7 +672,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) if (c->state & CONFIG_LOCKED) return -EACCES; if (c->state & CONFIG_IRQ_REQ) { - ds_dbg(s, 0, "IRQ already configured\n"); + dev_dbg(&s->dev, "IRQ already configured\n"); return -EBUSY; } @@ -798,7 +783,7 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h if (!(s->state & SOCKET_PRESENT)) return -ENODEV; if (req->Attributes & (WIN_PAGED | WIN_SHARED)) { - ds_dbg(s, 0, "bad attribute setting for iomem region\n"); + dev_dbg(&s->dev, "bad attribute setting for iomem region\n"); return -EINVAL; } @@ -809,12 +794,12 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h (req->Attributes & WIN_STRICT_ALIGN)) ? req->Size : s->map_size); if (req->Size & (s->map_size-1)) { - ds_dbg(s, 0, "invalid map size\n"); + dev_dbg(&s->dev, "invalid map size\n"); return -EINVAL; } if ((req->Base && (s->features & SS_CAP_STATIC_MAP)) || (req->Base & (align-1))) { - ds_dbg(s, 0, "invalid base address\n"); + dev_dbg(&s->dev, "invalid base address\n"); return -EINVAL; } if (req->Base) @@ -824,7 +809,7 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h for (w = 0; w < MAX_WIN; w++) if (!(s->state & SOCKET_WIN_REQ(w))) break; if (w == MAX_WIN) { - ds_dbg(s, 0, "all windows are used already\n"); + dev_dbg(&s->dev, "all windows are used already\n"); return -EINVAL; } @@ -838,7 +823,7 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h win->ctl.res = pcmcia_find_mem_region(req->Base, req->Size, align, (req->Attributes & WIN_MAP_BELOW_1MB), s); if (!win->ctl.res) { - ds_dbg(s, 0, "allocating mem region failed\n"); + dev_dbg(&s->dev, "allocating mem region failed\n"); return -EINVAL; } } @@ -858,7 +843,7 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h win->ctl.flags |= MAP_USE_WAIT; win->ctl.card_start = 0; if (s->ops->set_mem_map(s, &win->ctl) != 0) { - ds_dbg(s, 0, "failed to set memory mapping\n"); + dev_dbg(&s->dev, "failed to set memory mapping\n"); return -EIO; } s->state |= SOCKET_WIN_REQ(w); -- cgit v0.10.2 From c9f50dddd184a020d64dab63fa795967f0f14aa4 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 23 Oct 2009 12:56:46 +0200 Subject: pcmcia: use dynamic debug in PCMCIA socket drivers Make use of the dynamic debug infrastructure in various PCMCIA socket drivers. By doing so, only the drivers relying on soc_common make use of CONFIG_PCMCIA_DEBUG. Therefore, update the Kconfig entry accordingly. Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig index 17f38a7..0c44ddd 100644 --- a/drivers/pcmcia/Kconfig +++ b/drivers/pcmcia/Kconfig @@ -17,24 +17,6 @@ menuconfig PCCARD if PCCARD -config PCMCIA_DEBUG - bool "Enable PCCARD debugging" - help - Say Y here to enable PCMCIA subsystem debugging. You - will need to choose the debugging level either via the - kernel command line, or module options depending whether - you build the PCMCIA as modules. - - The kernel command line options are: - pcmcia_core.pc_debug=N - pcmcia.pc_debug=N - sa11xx_core.pc_debug=N - - The module option is called pc_debug=N - - In all the above examples, N is the debugging verbosity - level. - config PCMCIA tristate "16-bit PCMCIA support" select CRC32 @@ -225,6 +207,24 @@ config PCMCIA_PXA2XX help Say Y here to include support for the PXA2xx PCMCIA controller +config PCMCIA_DEBUG + bool "Enable debugging" + depends on (PCMCIA_SA1111 || PCMCIA_SA1100 || PCMCIA_PXA2XX) + help + Say Y here to enable debugging for the SoC PCMCIA layer. + You will need to choose the debugging level either via the + kernel command line, or module options depending whether + you build the drivers as modules. + + The kernel command line options are: + sa11xx_core.pc_debug=N + pxa2xx_core.pc_debug=N + + The module option is called pc_debug=N + + In all the above examples, N is the debugging verbosity + level. + config PCMCIA_PROBE bool default y if ISA && !ARCH_SA1100 && !ARCH_CLPS711X && !PARISC diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c index a4aacb8..c13fd93 100644 --- a/drivers/pcmcia/i82365.c +++ b/drivers/pcmcia/i82365.c @@ -63,21 +63,6 @@ #include "vg468.h" #include "ricoh.h" -#ifdef CONFIG_PCMCIA_DEBUG -static const char version[] = -"i82365.c 1.265 1999/11/10 18:36:21 (David Hinds)"; - -static int pc_debug; - -module_param(pc_debug, int, 0644); - -#define debug(lvl, fmt, arg...) do { \ - if (pc_debug > (lvl)) \ - printk(KERN_DEBUG "i82365: " fmt , ## arg); \ -} while (0) -#else -#define debug(lvl, fmt, arg...) do { } while (0) -#endif static irqreturn_t i365_count_irq(int, void *); static inline int _check_irq(int irq, int flags) @@ -501,13 +486,13 @@ static irqreturn_t i365_count_irq(int irq, void *dev) { i365_get(irq_sock, I365_CSC); irq_hits++; - debug(2, "-> hit on irq %d\n", irq); + pr_debug("i82365: -> hit on irq %d\n", irq); return IRQ_HANDLED; } static u_int __init test_irq(u_short sock, int irq) { - debug(2, " testing ISA irq %d\n", irq); + pr_debug("i82365: testing ISA irq %d\n", irq); if (request_irq(irq, i365_count_irq, IRQF_PROBE_SHARED, "scan", i365_count_irq) != 0) return 1; @@ -515,7 +500,7 @@ static u_int __init test_irq(u_short sock, int irq) msleep(10); if (irq_hits) { free_irq(irq, i365_count_irq); - debug(2, " spurious hit!\n"); + pr_debug("i82365: spurious hit!\n"); return 1; } @@ -528,7 +513,7 @@ static u_int __init test_irq(u_short sock, int irq) /* mask all interrupts */ i365_set(sock, I365_CSCINT, 0); - debug(2, " hits = %d\n", irq_hits); + pr_debug("i82365: hits = %d\n", irq_hits); return (irq_hits != 1); } @@ -854,7 +839,7 @@ static irqreturn_t pcic_interrupt(int irq, void *dev) u_long flags = 0; int handled = 0; - debug(4, "pcic_interrupt(%d)\n", irq); + pr_debug("pcic_interrupt(%d)\n", irq); for (j = 0; j < 20; j++) { active = 0; @@ -878,7 +863,7 @@ static irqreturn_t pcic_interrupt(int irq, void *dev) events |= (csc & I365_CSC_READY) ? SS_READY : 0; } ISA_UNLOCK(i, flags); - debug(2, "socket %d event 0x%02x\n", i, events); + pr_debug("socket %d event 0x%02x\n", i, events); if (events) pcmcia_parse_events(&socket[i].socket, events); @@ -890,7 +875,7 @@ static irqreturn_t pcic_interrupt(int irq, void *dev) if (j == 20) printk(KERN_NOTICE "i82365: infinite loop in interrupt handler\n"); - debug(4, "interrupt done\n"); + pr_debug("pcic_interrupt done\n"); return IRQ_RETVAL(handled); } /* pcic_interrupt */ @@ -932,7 +917,7 @@ static int i365_get_status(u_short sock, u_int *value) } } - debug(1, "GetStatus(%d) = %#4.4x\n", sock, *value); + pr_debug("GetStatus(%d) = %#4.4x\n", sock, *value); return 0; } /* i365_get_status */ @@ -943,7 +928,7 @@ static int i365_set_socket(u_short sock, socket_state_t *state) struct i82365_socket *t = &socket[sock]; u_char reg; - debug(1, "SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " + pr_debug("SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " "io_irq %d, csc_mask %#2.2x)\n", sock, state->flags, state->Vcc, state->Vpp, state->io_irq, state->csc_mask); @@ -1052,7 +1037,7 @@ static int i365_set_io_map(u_short sock, struct pccard_io_map *io) { u_char map, ioctl; - debug(1, "SetIOMap(%d, %d, %#2.2x, %d ns, " + pr_debug("SetIOMap(%d, %d, %#2.2x, %d ns, " "%#llx-%#llx)\n", sock, io->map, io->flags, io->speed, (unsigned long long)io->start, (unsigned long long)io->stop); map = io->map; @@ -1082,7 +1067,7 @@ static int i365_set_mem_map(u_short sock, struct pccard_mem_map *mem) u_short base, i; u_char map; - debug(1, "SetMemMap(%d, %d, %#2.2x, %d ns, %#llx-%#llx, " + pr_debug("SetMemMap(%d, %d, %#2.2x, %d ns, %#llx-%#llx, " "%#x)\n", sock, mem->map, mem->flags, mem->speed, (unsigned long long)mem->res->start, (unsigned long long)mem->res->end, mem->card_start); diff --git a/drivers/pcmcia/m32r_cfc.c b/drivers/pcmcia/m32r_cfc.c index 7dfbee1..26a621c 100644 --- a/drivers/pcmcia/m32r_cfc.c +++ b/drivers/pcmcia/m32r_cfc.c @@ -38,17 +38,6 @@ #include "m32r_cfc.h" -#ifdef CONFIG_PCMCIA_DEBUG -static int m32r_cfc_debug; -module_param(m32r_cfc_debug, int, 0644); -#define debug(lvl, fmt, arg...) do { \ - if (m32r_cfc_debug > (lvl)) \ - printk(KERN_DEBUG "m32r_cfc: " fmt , ## arg); \ -} while (0) -#else -#define debug(n, args...) do { } while (0) -#endif - /* Poll status interval -- 0 means default to interrupt */ static int poll_interval = 0; @@ -123,7 +112,7 @@ void pcc_ioread_byte(int sock, unsigned long port, void *buf, size_t size, unsigned char *bp = (unsigned char *)buf; unsigned long flags; - debug(3, "m32r_cfc: pcc_ioread_byte: sock=%d, port=%#lx, buf=%p, " + pr_debug("m32r_cfc: pcc_ioread_byte: sock=%d, port=%#lx, buf=%p, " "size=%u, nmemb=%d, flag=%d\n", sock, port, buf, size, nmemb, flag); @@ -132,7 +121,7 @@ void pcc_ioread_byte(int sock, unsigned long port, void *buf, size_t size, printk("m32r_cfc:ioread_byte null port :%#lx\n",port); return; } - debug(3, "m32r_cfc: pcc_ioread_byte: addr=%#lx\n", addr); + pr_debug("m32r_cfc: pcc_ioread_byte: addr=%#lx\n", addr); spin_lock_irqsave(&pcc_lock, flags); /* read Byte */ @@ -148,7 +137,7 @@ void pcc_ioread_word(int sock, unsigned long port, void *buf, size_t size, unsigned short *bp = (unsigned short *)buf; unsigned long flags; - debug(3, "m32r_cfc: pcc_ioread_word: sock=%d, port=%#lx, " + pr_debug("m32r_cfc: pcc_ioread_word: sock=%d, port=%#lx, " "buf=%p, size=%u, nmemb=%d, flag=%d\n", sock, port, buf, size, nmemb, flag); @@ -163,7 +152,7 @@ void pcc_ioread_word(int sock, unsigned long port, void *buf, size_t size, printk("m32r_cfc:ioread_word null port :%#lx\n",port); return; } - debug(3, "m32r_cfc: pcc_ioread_word: addr=%#lx\n", addr); + pr_debug("m32r_cfc: pcc_ioread_word: addr=%#lx\n", addr); spin_lock_irqsave(&pcc_lock, flags); /* read Word */ @@ -179,7 +168,7 @@ void pcc_iowrite_byte(int sock, unsigned long port, void *buf, size_t size, unsigned char *bp = (unsigned char *)buf; unsigned long flags; - debug(3, "m32r_cfc: pcc_iowrite_byte: sock=%d, port=%#lx, " + pr_debug("m32r_cfc: pcc_iowrite_byte: sock=%d, port=%#lx, " "buf=%p, size=%u, nmemb=%d, flag=%d\n", sock, port, buf, size, nmemb, flag); @@ -189,7 +178,7 @@ void pcc_iowrite_byte(int sock, unsigned long port, void *buf, size_t size, printk("m32r_cfc:iowrite_byte null port:%#lx\n",port); return; } - debug(3, "m32r_cfc: pcc_iowrite_byte: addr=%#lx\n", addr); + pr_debug("m32r_cfc: pcc_iowrite_byte: addr=%#lx\n", addr); spin_lock_irqsave(&pcc_lock, flags); while (nmemb--) @@ -204,7 +193,7 @@ void pcc_iowrite_word(int sock, unsigned long port, void *buf, size_t size, unsigned short *bp = (unsigned short *)buf; unsigned long flags; - debug(3, "m32r_cfc: pcc_iowrite_word: sock=%d, port=%#lx, " + pr_debug("m32r_cfc: pcc_iowrite_word: sock=%d, port=%#lx, " "buf=%p, size=%u, nmemb=%d, flag=%d\n", sock, port, buf, size, nmemb, flag); @@ -226,7 +215,7 @@ void pcc_iowrite_word(int sock, unsigned long port, void *buf, size_t size, return; } #endif - debug(3, "m32r_cfc: pcc_iowrite_word: addr=%#lx\n", addr); + pr_debug("m32r_cfc: pcc_iowrite_word: addr=%#lx\n", addr); spin_lock_irqsave(&pcc_lock, flags); while (nmemb--) @@ -262,7 +251,7 @@ static struct timer_list poll_timer; static unsigned int pcc_get(u_short sock, unsigned int reg) { unsigned int val = inw(reg); - debug(3, "m32r_cfc: pcc_get: reg(0x%08x)=0x%04x\n", reg, val); + pr_debug("m32r_cfc: pcc_get: reg(0x%08x)=0x%04x\n", reg, val); return val; } @@ -270,7 +259,7 @@ static unsigned int pcc_get(u_short sock, unsigned int reg) static void pcc_set(u_short sock, unsigned int reg, unsigned int data) { outw(data, reg); - debug(3, "m32r_cfc: pcc_set: reg(0x%08x)=0x%04x\n", reg, data); + pr_debug("m32r_cfc: pcc_set: reg(0x%08x)=0x%04x\n", reg, data); } /*====================================================================== @@ -286,14 +275,14 @@ static int __init is_alive(u_short sock) { unsigned int stat; - debug(3, "m32r_cfc: is_alive:\n"); + pr_debug("m32r_cfc: is_alive:\n"); printk("CF: "); stat = pcc_get(sock, (unsigned int)PLD_CFSTS); if (!stat) printk("No "); printk("Card is detected at socket %d : stat = 0x%08x\n", sock, stat); - debug(3, "m32r_cfc: is_alive: sock stat is 0x%04x\n", stat); + pr_debug("m32r_cfc: is_alive: sock stat is 0x%04x\n", stat); return 0; } @@ -303,7 +292,7 @@ static void add_pcc_socket(ulong base, int irq, ulong mapaddr, { pcc_socket_t *t = &socket[pcc_sockets]; - debug(3, "m32r_cfc: add_pcc_socket: base=%#lx, irq=%d, " + pr_debug("m32r_cfc: add_pcc_socket: base=%#lx, irq=%d, " "mapaddr=%#lx, ioaddr=%08x\n", base, irq, mapaddr, ioaddr); @@ -358,7 +347,7 @@ static void add_pcc_socket(ulong base, int irq, ulong mapaddr, /* eject interrupt */ request_irq(irq+1, pcc_interrupt, 0, "m32r_cfc", pcc_interrupt); #endif - debug(3, "m32r_cfc: enable CFMSK, RDYSEL\n"); + pr_debug("m32r_cfc: enable CFMSK, RDYSEL\n"); pcc_set(pcc_sockets, (unsigned int)PLD_CFIMASK, 0x01); #endif /* CONFIG_PLAT_USRV */ #if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_USRV) || defined(CONFIG_PLAT_OPSPUT) @@ -378,26 +367,26 @@ static irqreturn_t pcc_interrupt(int irq, void *dev) u_int events = 0; int handled = 0; - debug(3, "m32r_cfc: pcc_interrupt: irq=%d, dev=%p\n", irq, dev); + pr_debug("m32r_cfc: pcc_interrupt: irq=%d, dev=%p\n", irq, dev); for (i = 0; i < pcc_sockets; i++) { if (socket[i].cs_irq1 != irq && socket[i].cs_irq2 != irq) continue; handled = 1; - debug(3, "m32r_cfc: pcc_interrupt: socket %d irq 0x%02x ", + pr_debug("m32r_cfc: pcc_interrupt: socket %d irq 0x%02x ", i, irq); events |= SS_DETECT; /* insert or eject */ if (events) pcmcia_parse_events(&socket[i].socket, events); } - debug(3, "m32r_cfc: pcc_interrupt: done\n"); + pr_debug("m32r_cfc: pcc_interrupt: done\n"); return IRQ_RETVAL(handled); } /* pcc_interrupt */ static void pcc_interrupt_wrapper(u_long data) { - debug(3, "m32r_cfc: pcc_interrupt_wrapper:\n"); + pr_debug("m32r_cfc: pcc_interrupt_wrapper:\n"); pcc_interrupt(0, NULL); init_timer(&poll_timer); poll_timer.expires = jiffies + poll_interval; @@ -410,17 +399,17 @@ static int _pcc_get_status(u_short sock, u_int *value) { u_int status; - debug(3, "m32r_cfc: _pcc_get_status:\n"); + pr_debug("m32r_cfc: _pcc_get_status:\n"); status = pcc_get(sock, (unsigned int)PLD_CFSTS); *value = (status) ? SS_DETECT : 0; - debug(3, "m32r_cfc: _pcc_get_status: status=0x%08x\n", status); + pr_debug("m32r_cfc: _pcc_get_status: status=0x%08x\n", status); #if defined(CONFIG_PLAT_M32700UT) || defined(CONFIG_PLAT_USRV) || defined(CONFIG_PLAT_OPSPUT) if ( status ) { /* enable CF power */ status = inw((unsigned int)PLD_CPCR); if (!(status & PLD_CPCR_CF)) { - debug(3, "m32r_cfc: _pcc_get_status: " + pr_debug("m32r_cfc: _pcc_get_status: " "power on (CPCR=0x%08x)\n", status); status |= PLD_CPCR_CF; outw(status, (unsigned int)PLD_CPCR); @@ -439,7 +428,7 @@ static int _pcc_get_status(u_short sock, u_int *value) status &= ~PLD_CPCR_CF; outw(status, (unsigned int)PLD_CPCR); udelay(100); - debug(3, "m32r_cfc: _pcc_get_status: " + pr_debug("m32r_cfc: _pcc_get_status: " "power off (CPCR=0x%08x)\n", status); } #elif defined(CONFIG_PLAT_MAPPI2) || defined(CONFIG_PLAT_MAPPI3) @@ -465,13 +454,13 @@ static int _pcc_get_status(u_short sock, u_int *value) /* disable CF power */ pcc_set(sock, (unsigned int)PLD_CPCR, 0); udelay(100); - debug(3, "m32r_cfc: _pcc_get_status: " + pr_debug("m32r_cfc: _pcc_get_status: " "power off (CPCR=0x%08x)\n", status); } #else #error no platform configuration #endif - debug(3, "m32r_cfc: _pcc_get_status: GetStatus(%d) = %#4.4x\n", + pr_debug("m32r_cfc: _pcc_get_status: GetStatus(%d) = %#4.4x\n", sock, *value); return 0; } /* _get_status */ @@ -480,7 +469,7 @@ static int _pcc_get_status(u_short sock, u_int *value) static int _pcc_set_socket(u_short sock, socket_state_t *state) { - debug(3, "m32r_cfc: SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " + pr_debug("m32r_cfc: SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " "io_irq %d, csc_mask %#2.2x)\n", sock, state->flags, state->Vcc, state->Vpp, state->io_irq, state->csc_mask); @@ -492,41 +481,39 @@ static int _pcc_set_socket(u_short sock, socket_state_t *state) } #endif if (state->flags & SS_RESET) { - debug(3, ":RESET\n"); + pr_debug(":RESET\n"); pcc_set(sock,(unsigned int)PLD_CFRSTCR,0x101); }else{ pcc_set(sock,(unsigned int)PLD_CFRSTCR,0x100); } if (state->flags & SS_OUTPUT_ENA){ - debug(3, ":OUTPUT_ENA\n"); + pr_debug(":OUTPUT_ENA\n"); /* bit clear */ pcc_set(sock,(unsigned int)PLD_CFBUFCR,0); } else { pcc_set(sock,(unsigned int)PLD_CFBUFCR,1); } -#ifdef CONFIG_PCMCIA_DEBUG if(state->flags & SS_IOCARD){ - debug(3, ":IOCARD"); + pr_debug(":IOCARD"); } if (state->flags & SS_PWR_AUTO) { - debug(3, ":PWR_AUTO"); + pr_debug(":PWR_AUTO"); } if (state->csc_mask & SS_DETECT) - debug(3, ":csc-SS_DETECT"); + pr_debug(":csc-SS_DETECT"); if (state->flags & SS_IOCARD) { if (state->csc_mask & SS_STSCHG) - debug(3, ":STSCHG"); + pr_debug(":STSCHG"); } else { if (state->csc_mask & SS_BATDEAD) - debug(3, ":BATDEAD"); + pr_debug(":BATDEAD"); if (state->csc_mask & SS_BATWARN) - debug(3, ":BATWARN"); + pr_debug(":BATWARN"); if (state->csc_mask & SS_READY) - debug(3, ":READY"); + pr_debug(":READY"); } - debug(3, "\n"); -#endif + pr_debug("\n"); return 0; } /* _set_socket */ @@ -536,7 +523,7 @@ static int _pcc_set_io_map(u_short sock, struct pccard_io_map *io) { u_char map; - debug(3, "m32r_cfc: SetIOMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m32r_cfc: SetIOMap(%d, %d, %#2.2x, %d ns, " "%#llx-%#llx)\n", sock, io->map, io->flags, io->speed, (unsigned long long)io->start, (unsigned long long)io->stop); @@ -554,7 +541,7 @@ static int _pcc_set_mem_map(u_short sock, struct pccard_mem_map *mem) u_long addr; pcc_socket_t *t = &socket[sock]; - debug(3, "m32r_cfc: SetMemMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m32r_cfc: SetMemMap(%d, %d, %#2.2x, %d ns, " "%#llx, %#x)\n", sock, map, mem->flags, mem->speed, (unsigned long long)mem->static_start, mem->card_start); @@ -640,11 +627,11 @@ static int pcc_get_status(struct pcmcia_socket *s, u_int *value) unsigned int sock = container_of(s, struct pcc_socket, socket)->number; if (socket[sock].flags & IS_ALIVE) { - debug(3, "m32r_cfc: pcc_get_status: sock(%d) -EINVAL\n", sock); + dev_dbg(&s->dev, "pcc_get_status: sock(%d) -EINVAL\n", sock); *value = 0; return -EINVAL; } - debug(3, "m32r_cfc: pcc_get_status: sock(%d)\n", sock); + dev_dbg(&s->dev, "pcc_get_status: sock(%d)\n", sock); LOCKED(_pcc_get_status(sock, value)); } @@ -653,10 +640,10 @@ static int pcc_set_socket(struct pcmcia_socket *s, socket_state_t *state) unsigned int sock = container_of(s, struct pcc_socket, socket)->number; if (socket[sock].flags & IS_ALIVE) { - debug(3, "m32r_cfc: pcc_set_socket: sock(%d) -EINVAL\n", sock); + dev_dbg(&s->dev, "pcc_set_socket: sock(%d) -EINVAL\n", sock); return -EINVAL; } - debug(3, "m32r_cfc: pcc_set_socket: sock(%d)\n", sock); + dev_dbg(&s->dev, "pcc_set_socket: sock(%d)\n", sock); LOCKED(_pcc_set_socket(sock, state)); } @@ -665,10 +652,10 @@ static int pcc_set_io_map(struct pcmcia_socket *s, struct pccard_io_map *io) unsigned int sock = container_of(s, struct pcc_socket, socket)->number; if (socket[sock].flags & IS_ALIVE) { - debug(3, "m32r_cfc: pcc_set_io_map: sock(%d) -EINVAL\n", sock); + dev_dbg(&s->dev, "pcc_set_io_map: sock(%d) -EINVAL\n", sock); return -EINVAL; } - debug(3, "m32r_cfc: pcc_set_io_map: sock(%d)\n", sock); + dev_dbg(&s->dev, "pcc_set_io_map: sock(%d)\n", sock); LOCKED(_pcc_set_io_map(sock, io)); } @@ -677,16 +664,16 @@ static int pcc_set_mem_map(struct pcmcia_socket *s, struct pccard_mem_map *mem) unsigned int sock = container_of(s, struct pcc_socket, socket)->number; if (socket[sock].flags & IS_ALIVE) { - debug(3, "m32r_cfc: pcc_set_mem_map: sock(%d) -EINVAL\n", sock); + dev_dbg(&s->dev, "pcc_set_mem_map: sock(%d) -EINVAL\n", sock); return -EINVAL; } - debug(3, "m32r_cfc: pcc_set_mem_map: sock(%d)\n", sock); + dev_dbg(&s->dev, "pcc_set_mem_map: sock(%d)\n", sock); LOCKED(_pcc_set_mem_map(sock, mem)); } static int pcc_init(struct pcmcia_socket *s) { - debug(3, "m32r_cfc: pcc_init()\n"); + dev_dbg(&s->dev, "pcc_init()\n"); return 0; } diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c index c6524f9..72844c5 100644 --- a/drivers/pcmcia/m32r_pcc.c +++ b/drivers/pcmcia/m32r_pcc.c @@ -45,16 +45,6 @@ #define PCC_DEBUG_DBEX -#ifdef CONFIG_PCMCIA_DEBUG -static int m32r_pcc_debug; -module_param(m32r_pcc_debug, int, 0644); -#define debug(lvl, fmt, arg...) do { \ - if (m32r_pcc_debug > (lvl)) \ - printk(KERN_DEBUG "m32r_pcc: " fmt , ## arg); \ -} while (0) -#else -#define debug(n, args...) do { } while (0) -#endif /* Poll status interval -- 0 means default to interrupt */ static int poll_interval = 0; @@ -358,7 +348,7 @@ static irqreturn_t pcc_interrupt(int irq, void *dev) u_int events, active; int handled = 0; - debug(4, "m32r: pcc_interrupt(%d)\n", irq); + pr_debug("m32r_pcc: pcc_interrupt(%d)\n", irq); for (j = 0; j < 20; j++) { active = 0; @@ -369,13 +359,14 @@ static irqreturn_t pcc_interrupt(int irq, void *dev) handled = 1; irc = pcc_get(i, PCIRC); irc >>=16; - debug(2, "m32r-pcc:interrupt: socket %d pcirc 0x%02x ", i, irc); + pr_debug("m32r_pcc: interrupt: socket %d pcirc 0x%02x ", + i, irc); if (!irc) continue; events = (irc) ? SS_DETECT : 0; events |= (pcc_get(i,PCCR) & PCCR_PCEN) ? SS_READY : 0; - debug(2, " event 0x%02x\n", events); + pr_debug("m32r_pcc: event 0x%02x\n", events); if (events) pcmcia_parse_events(&socket[i].socket, events); @@ -388,7 +379,7 @@ static irqreturn_t pcc_interrupt(int irq, void *dev) if (j == 20) printk(KERN_NOTICE "m32r-pcc: infinite loop in interrupt handler\n"); - debug(4, "m32r-pcc: interrupt done\n"); + pr_debug("m32r_pcc: interrupt done\n"); return IRQ_RETVAL(handled); } /* pcc_interrupt */ @@ -422,7 +413,7 @@ static int _pcc_get_status(u_short sock, u_int *value) status = pcc_get(sock,PCCSIGCR); *value |= (status & PCCSIGCR_VEN) ? SS_POWERON : 0; - debug(3, "m32r-pcc: GetStatus(%d) = %#4.4x\n", sock, *value); + pr_debug("m32r_pcc: GetStatus(%d) = %#4.4x\n", sock, *value); return 0; } /* _get_status */ @@ -432,7 +423,7 @@ static int _pcc_set_socket(u_short sock, socket_state_t *state) { u_long reg = 0; - debug(3, "m32r-pcc: SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " + pr_debug("m32r_pcc: SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " "io_irq %d, csc_mask %#2.2x)", sock, state->flags, state->Vcc, state->Vpp, state->io_irq, state->csc_mask); @@ -448,11 +439,11 @@ static int _pcc_set_socket(u_short sock, socket_state_t *state) } if (state->flags & SS_RESET) { - debug(3, ":RESET\n"); + pr_debug("m32r_pcc: :RESET\n"); reg |= PCCSIGCR_CRST; } if (state->flags & SS_OUTPUT_ENA){ - debug(3, ":OUTPUT_ENA\n"); + pr_debug("m32r_pcc: :OUTPUT_ENA\n"); /* bit clear */ } else { reg |= PCCSIGCR_SEN; @@ -460,28 +451,26 @@ static int _pcc_set_socket(u_short sock, socket_state_t *state) pcc_set(sock,PCCSIGCR,reg); -#ifdef CONFIG_PCMCIA_DEBUG if(state->flags & SS_IOCARD){ - debug(3, ":IOCARD"); + pr_debug("m32r_pcc: :IOCARD"); } if (state->flags & SS_PWR_AUTO) { - debug(3, ":PWR_AUTO"); + pr_debug("m32r_pcc: :PWR_AUTO"); } if (state->csc_mask & SS_DETECT) - debug(3, ":csc-SS_DETECT"); + pr_debug("m32r_pcc: :csc-SS_DETECT"); if (state->flags & SS_IOCARD) { if (state->csc_mask & SS_STSCHG) - debug(3, ":STSCHG"); + pr_debug("m32r_pcc: :STSCHG"); } else { if (state->csc_mask & SS_BATDEAD) - debug(3, ":BATDEAD"); + pr_debug("m32r_pcc: :BATDEAD"); if (state->csc_mask & SS_BATWARN) - debug(3, ":BATWARN"); + pr_debug("m32r_pcc: :BATWARN"); if (state->csc_mask & SS_READY) - debug(3, ":READY"); + pr_debug("m32r_pcc: :READY"); } - debug(3, "\n"); -#endif + pr_debug("m32r_pcc: \n"); return 0; } /* _set_socket */ @@ -491,7 +480,7 @@ static int _pcc_set_io_map(u_short sock, struct pccard_io_map *io) { u_char map; - debug(3, "m32r-pcc: SetIOMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m32r_pcc: SetIOMap(%d, %d, %#2.2x, %d ns, " "%#llx-%#llx)\n", sock, io->map, io->flags, io->speed, (unsigned long long)io->start, (unsigned long long)io->stop); @@ -515,7 +504,7 @@ static int _pcc_set_mem_map(u_short sock, struct pccard_mem_map *mem) #endif #endif - debug(3, "m32r-pcc: SetMemMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m32r_pcc: SetMemMap(%d, %d, %#2.2x, %d ns, " "%#llx, %#x)\n", sock, map, mem->flags, mem->speed, (unsigned long long)mem->static_start, mem->card_start); @@ -662,7 +651,7 @@ static int pcc_set_mem_map(struct pcmcia_socket *s, struct pccard_mem_map *mem) static int pcc_init(struct pcmcia_socket *s) { - debug(4, "m32r-pcc: init call\n"); + pr_debug("m32r_pcc: init call\n"); return 0; } diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c index 403559b..7f79c4e 100644 --- a/drivers/pcmcia/m8xx_pcmcia.c +++ b/drivers/pcmcia/m8xx_pcmcia.c @@ -64,14 +64,6 @@ #include #include -#ifdef CONFIG_PCMCIA_DEBUG -static int pc_debug; -module_param(pc_debug, int, 0); -#define dprintk(args...) printk(KERN_DEBUG "m8xx_pcmcia: " args); -#else -#define dprintk(args...) -#endif - #define pcmcia_info(args...) printk(KERN_INFO "m8xx_pcmcia: "args) #define pcmcia_error(args...) printk(KERN_ERR "m8xx_pcmcia: "args) @@ -565,7 +557,7 @@ static irqreturn_t m8xx_interrupt(int irq, void *dev) unsigned int i, events, pscr, pipr, per; pcmconf8xx_t *pcmcia = socket[0].pcmcia; - dprintk("Interrupt!\n"); + pr_debug("m8xx_pcmcia: Interrupt!\n"); /* get interrupt sources */ pscr = in_be32(&pcmcia->pcmc_pscr); @@ -614,7 +606,7 @@ static irqreturn_t m8xx_interrupt(int irq, void *dev) /* call the handler */ - dprintk("slot %u: events = 0x%02x, pscr = 0x%08x, " + pr_debug("m8xx_pcmcia: slot %u: events = 0x%02x, pscr = 0x%08x, " "pipr = 0x%08x\n", i, events, pscr, pipr); if (events) { @@ -641,7 +633,7 @@ static irqreturn_t m8xx_interrupt(int irq, void *dev) /* clear the interrupt sources */ out_be32(&pcmcia->pcmc_pscr, pscr); - dprintk("Interrupt done.\n"); + pr_debug("m8xx_pcmcia: Interrupt done.\n"); return IRQ_HANDLED; } @@ -815,7 +807,7 @@ static int m8xx_get_status(struct pcmcia_socket *sock, unsigned int *value) }; } - dprintk("GetStatus(%d) = %#2.2x\n", lsock, *value); + pr_debug("m8xx_pcmcia: GetStatus(%d) = %#2.2x\n", lsock, *value); return 0; } @@ -828,7 +820,7 @@ static int m8xx_set_socket(struct pcmcia_socket *sock, socket_state_t * state) unsigned long flags; pcmconf8xx_t *pcmcia = socket[0].pcmcia; - dprintk("SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " + pr_debug("m8xx_pcmcia: SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " "io_irq %d, csc_mask %#2.2x)\n", lsock, state->flags, state->Vcc, state->Vpp, state->io_irq, state->csc_mask); @@ -974,7 +966,7 @@ static int m8xx_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) #define M8XX_SIZE (io->stop - io->start + 1) #define M8XX_BASE (PCMCIA_IO_WIN_BASE + io->start) - dprintk("SetIOMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m8xx_pcmcia: SetIOMap(%d, %d, %#2.2x, %d ns, " "%#4.4llx-%#4.4llx)\n", lsock, io->map, io->flags, io->speed, (unsigned long long)io->start, (unsigned long long)io->stop); @@ -988,7 +980,7 @@ static int m8xx_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) if (io->flags & MAP_ACTIVE) { - dprintk("io->flags & MAP_ACTIVE\n"); + pr_debug("m8xx_pcmcia: io->flags & MAP_ACTIVE\n"); winnr = (PCMCIA_MEM_WIN_NO * PCMCIA_SOCKETS_NO) + (lsock * PCMCIA_IO_WIN_NO) + io->map; @@ -1018,8 +1010,8 @@ static int m8xx_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) out_be32(&w->or, reg); - dprintk("Socket %u: Mapped io window %u at %#8.8x, " - "OR = %#8.8x.\n", lsock, io->map, w->br, w->or); + pr_debug("m8xx_pcmcia: Socket %u: Mapped io window %u at " + "%#8.8x, OR = %#8.8x.\n", lsock, io->map, w->br, w->or); } else { /* shutdown IO window */ winnr = (PCMCIA_MEM_WIN_NO * PCMCIA_SOCKETS_NO) @@ -1033,14 +1025,14 @@ static int m8xx_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) out_be32(&w->or, 0); /* turn off window */ out_be32(&w->br, 0); /* turn off base address */ - dprintk("Socket %u: Unmapped io window %u at %#8.8x, " - "OR = %#8.8x.\n", lsock, io->map, w->br, w->or); + pr_debug("m8xx_pcmcia: Socket %u: Unmapped io window %u at " + "%#8.8x, OR = %#8.8x.\n", lsock, io->map, w->br, w->or); } /* copy the struct and modify the copy */ s->io_win[io->map] = *io; s->io_win[io->map].flags &= (MAP_WRPROT | MAP_16BIT | MAP_ACTIVE); - dprintk("SetIOMap exit\n"); + pr_debug("m8xx_pcmcia: SetIOMap exit\n"); return 0; } @@ -1055,7 +1047,7 @@ static int m8xx_set_mem_map(struct pcmcia_socket *sock, unsigned int reg, winnr; pcmconf8xx_t *pcmcia = s->pcmcia; - dprintk("SetMemMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m8xx_pcmcia: SetMemMap(%d, %d, %#2.2x, %d ns, " "%#5.5llx, %#5.5x)\n", lsock, mem->map, mem->flags, mem->speed, (unsigned long long)mem->static_start, mem->card_start); @@ -1098,7 +1090,7 @@ static int m8xx_set_mem_map(struct pcmcia_socket *sock, out_be32(&w->or, reg); - dprintk("Socket %u: Mapped memory window %u at %#8.8x, " + pr_debug("m8xx_pcmcia: Socket %u: Mapped memory window %u at %#8.8x, " "OR = %#8.8x.\n", lsock, mem->map, w->br, w->or); if (mem->flags & MAP_ACTIVE) { @@ -1108,7 +1100,7 @@ static int m8xx_set_mem_map(struct pcmcia_socket *sock, + mem->card_start; } - dprintk("SetMemMap(%d, %d, %#2.2x, %d ns, " + pr_debug("m8xx_pcmcia: SetMemMap(%d, %d, %#2.2x, %d ns, " "%#5.5llx, %#5.5x)\n", lsock, mem->map, mem->flags, mem->speed, (unsigned long long)mem->static_start, mem->card_start); @@ -1129,7 +1121,7 @@ static int m8xx_sock_init(struct pcmcia_socket *sock) pccard_io_map io = { 0, 0, 0, 0, 1 }; pccard_mem_map mem = { 0, 0, 0, 0, 0, 0 }; - dprintk("sock_init(%d)\n", s); + pr_debug("m8xx_pcmcia: sock_init(%d)\n", s); m8xx_set_socket(sock, &dead_socket); for (i = 0; i < PCMCIA_IO_WIN_NO; i++) { diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c index 6918849..12c49ee1 100644 --- a/drivers/pcmcia/tcic.c +++ b/drivers/pcmcia/tcic.c @@ -55,21 +55,6 @@ #include #include "tcic.h" -#ifdef CONFIG_PCMCIA_DEBUG -static int pc_debug; - -module_param(pc_debug, int, 0644); -static const char version[] = -"tcic.c 1.111 2000/02/15 04:13:12 (David Hinds)"; - -#define debug(lvl, fmt, arg...) do { \ - if (pc_debug > (lvl)) \ - printk(KERN_DEBUG "tcic: " fmt , ## arg); \ -} while (0) -#else -#define debug(lvl, fmt, arg...) do { } while (0) -#endif - MODULE_AUTHOR("David Hinds "); MODULE_DESCRIPTION("Databook TCIC-2 PCMCIA socket driver"); MODULE_LICENSE("Dual MPL/GPL"); @@ -574,7 +559,7 @@ static irqreturn_t tcic_interrupt(int irq, void *dev) } else active = 1; - debug(2, "tcic_interrupt()\n"); + pr_debug("tcic_interrupt()\n"); for (i = 0; i < sockets; i++) { psock = socket_table[i].psock; @@ -611,13 +596,13 @@ static irqreturn_t tcic_interrupt(int irq, void *dev) } active = 0; - debug(2, "interrupt done\n"); + pr_debug("interrupt done\n"); return IRQ_HANDLED; } /* tcic_interrupt */ static void tcic_timer(u_long data) { - debug(2, "tcic_timer()\n"); + pr_debug("tcic_timer()\n"); tcic_timer_pending = 0; tcic_interrupt(0, NULL); } /* tcic_timer */ @@ -644,7 +629,7 @@ static int tcic_get_status(struct pcmcia_socket *sock, u_int *value) reg = tcic_getb(TCIC_PWR); if (reg & (TCIC_PWR_VCC(psock)|TCIC_PWR_VPP(psock))) *value |= SS_POWERON; - debug(1, "GetStatus(%d) = %#2.2x\n", psock, *value); + dev_dbg(&sock->dev, "GetStatus(%d) = %#2.2x\n", psock, *value); return 0; } /* tcic_get_status */ @@ -656,7 +641,7 @@ static int tcic_set_socket(struct pcmcia_socket *sock, socket_state_t *state) u_char reg; u_short scf1, scf2; - debug(1, "SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " + dev_dbg(&sock->dev, "SetSocket(%d, flags %#3.3x, Vcc %d, Vpp %d, " "io_irq %d, csc_mask %#2.2x)\n", psock, state->flags, state->Vcc, state->Vpp, state->io_irq, state->csc_mask); tcic_setw(TCIC_ADDR+2, (psock << TCIC_SS_SHFT) | TCIC_ADR2_INDREG); @@ -731,7 +716,7 @@ static int tcic_set_io_map(struct pcmcia_socket *sock, struct pccard_io_map *io) u_int addr; u_short base, len, ioctl; - debug(1, "SetIOMap(%d, %d, %#2.2x, %d ns, " + dev_dbg(&sock->dev, "SetIOMap(%d, %d, %#2.2x, %d ns, " "%#llx-%#llx)\n", psock, io->map, io->flags, io->speed, (unsigned long long)io->start, (unsigned long long)io->stop); if ((io->map > 1) || (io->start > 0xffff) || (io->stop > 0xffff) || @@ -768,7 +753,7 @@ static int tcic_set_mem_map(struct pcmcia_socket *sock, struct pccard_mem_map *m u_short addr, ctl; u_long base, len, mmap; - debug(1, "SetMemMap(%d, %d, %#2.2x, %d ns, " + dev_dbg(&sock->dev, "SetMemMap(%d, %d, %#2.2x, %d ns, " "%#llx-%#llx, %#x)\n", psock, mem->map, mem->flags, mem->speed, (unsigned long long)mem->res->start, (unsigned long long)mem->res->end, mem->card_start); -- cgit v0.10.2 From 6d9a299f675b176e2f81e1f6d5a361a1173971ea Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 12:20:18 +0200 Subject: pcmcia: extend error reporting and debug messages in core Add a few more error and debug messages to the PCMCIA core. Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index b0ec9c6..b229f6d 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -752,14 +752,17 @@ int pcmcia_reset_card(struct pcmcia_socket *skt) mutex_lock(&skt->skt_mutex); do { if (!(skt->state & SOCKET_PRESENT)) { + dev_dbg(&skt->dev, "can't reset, not present\n"); ret = -ENODEV; break; } if (skt->state & SOCKET_SUSPEND) { + dev_dbg(&skt->dev, "can't reset, suspended\n"); ret = -EBUSY; break; } if (skt->state & SOCKET_CARDBUS) { + dev_dbg(&skt->dev, "can't reset, is cardbus\n"); ret = -EPERM; break; } diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index 349bc56..cda48ea 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -159,8 +159,10 @@ int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, s = p_dev->socket; c = p_dev->function_config; - if (!(c->state & CONFIG_LOCKED)) + if (!(c->state & CONFIG_LOCKED)) { + dev_dbg(&s->dev, "Configuration isnt't locked\n"); return -EACCES; + } addr = (c->ConfigBase + reg->Offset) >> 1; @@ -174,6 +176,7 @@ int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, pcmcia_write_cis_mem(s, 1, addr, 1, &val); break; default: + dev_dbg(&s->dev, "Invalid conf register request\n"); return -EINVAL; break; } @@ -264,10 +267,14 @@ int pcmcia_modify_configuration(struct pcmcia_device *p_dev, s = p_dev->socket; c = p_dev->function_config; - if (!(s->state & SOCKET_PRESENT)) + if (!(s->state & SOCKET_PRESENT)) { + dev_dbg(&s->dev, "No card present\n"); return -ENODEV; - if (!(c->state & CONFIG_LOCKED)) + } + if (!(c->state & CONFIG_LOCKED)) { + dev_dbg(&s->dev, "Configuration isnt't locked\n"); return -EACCES; + } if (mod->Attributes & CONF_IRQ_CHANGE_VALID) { if (mod->Attributes & CONF_ENABLE_IRQ) { @@ -442,8 +449,10 @@ int pcmcia_release_window(window_handle_t win) if ((win == NULL) || (win->magic != WINDOW_MAGIC)) return -EINVAL; s = win->sock; - if (!(win->handle->_win & CLIENT_WIN_REQ(win->index))) + if (!(win->handle->_win & CLIENT_WIN_REQ(win->index))) { + dev_dbg(&s->dev, "not releasing unknown window\n"); return -EINVAL; + } /* Shut down memory window */ win->ctl.flags &= ~MAP_ACTIVE; @@ -482,8 +491,10 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, return -EINVAL; } c = p_dev->function_config; - if (c->state & CONFIG_LOCKED) + if (c->state & CONFIG_LOCKED) { + dev_dbg(&s->dev, "Configuration is locked\n"); return -EACCES; + } /* Do power control. We don't allow changes in Vcc. */ s->socket.Vpp = req->Vpp; @@ -595,14 +606,18 @@ int pcmcia_request_io(struct pcmcia_device *p_dev, io_req_t *req) struct pcmcia_socket *s = p_dev->socket; config_t *c; - if (!(s->state & SOCKET_PRESENT)) + if (!(s->state & SOCKET_PRESENT)) { + dev_dbg(&s->dev, "No card present\n"); return -ENODEV; + } if (!req) return -EINVAL; c = p_dev->function_config; - if (c->state & CONFIG_LOCKED) + if (c->state & CONFIG_LOCKED) { + dev_dbg(&s->dev, "Configuration is locked\n"); return -EACCES; + } if (c->state & CONFIG_IO_REQ) { dev_dbg(&s->dev, "IO already configured\n"); return -EBUSY; @@ -666,11 +681,15 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) int ret = -EINVAL, irq = 0; int type; - if (!(s->state & SOCKET_PRESENT)) + if (!(s->state & SOCKET_PRESENT)) { + dev_dbg(&s->dev, "No card present\n"); return -ENODEV; + } c = p_dev->function_config; - if (c->state & CONFIG_LOCKED) + if (c->state & CONFIG_LOCKED) { + dev_dbg(&s->dev, "Configuration is locked\n"); return -EACCES; + } if (c->state & CONFIG_IRQ_REQ) { dev_dbg(&s->dev, "IRQ already configured\n"); return -EBUSY; @@ -731,8 +750,10 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) #endif /* only assign PCI irq if no IRQ already assigned */ if (ret && !s->irq.AssignedIRQ) { - if (!s->pci_irq) + if (!s->pci_irq) { + dev_printk(KERN_INFO, &s->dev, "no IRQ found\n"); return ret; + } type = IRQF_SHARED; irq = s->pci_irq; } @@ -740,8 +761,11 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) if (ret && (req->Attributes & IRQ_HANDLE_PRESENT)) { ret = request_irq(irq, req->Handler, type, p_dev->devname, req->Instance); - if (ret) + if (ret) { + dev_printk(KERN_INFO, &s->dev, + "request_irq() failed\n"); return ret; + } } /* Make sure the fact the request type was overridden is passed back */ @@ -780,8 +804,10 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h u_long align; int w; - if (!(s->state & SOCKET_PRESENT)) + if (!(s->state & SOCKET_PRESENT)) { + dev_dbg(&s->dev, "No card present\n"); return -ENODEV; + } if (req->Attributes & (WIN_PAGED | WIN_SHARED)) { dev_dbg(&s->dev, "bad attribute setting for iomem region\n"); return -EINVAL; @@ -1020,7 +1046,8 @@ static int pcmcia_do_get_tuple(struct pcmcia_device *p_dev, tuple_t *tuple, if (*get->buf) { get->len = tuple->TupleDataLen; memcpy(*get->buf, tuple->TupleData, tuple->TupleDataLen); - } + } else + dev_dbg(&p_dev->dev, "do_get_tuple: out of memory\n"); return 0; }; -- cgit v0.10.2 From 444486a5f9d2737b50e53dc140292899b9497808 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 23 Oct 2009 12:55:28 +0200 Subject: pcmcia: use dynamic debug infrastructure, deprecate CS_CHECK (ide) ide-cs.c is the only PCMCIA device driver making use of CONFIG_PCMCIA_DEBUG, so convert it to use the dynamic debug infrastructure. Also, remove all usages of the CS_CHECK macro and replace them with proper Linux style calling and return value checking. The extra error reporting may be dropped, as the PCMCIA core already complains about any (non-driver-author) errors. CC: linux-ide@vger.kernel.org Signed-off-by: Dominik Brodowski diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c index dc99e26..5f94e21 100644 --- a/drivers/ata/pata_pcmcia.c +++ b/drivers/ata/pata_pcmcia.c @@ -177,9 +177,6 @@ static struct ata_port_operations pcmcia_8bit_port_ops = { .drain_fifo = pcmcia_8bit_drain_fifo, }; -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - struct pcmcia_config_check { unsigned long ctl_base; @@ -252,7 +249,7 @@ static int pcmcia_init_one(struct pcmcia_device *pdev) struct ata_port *ap; struct ata_pcmcia_info *info; struct pcmcia_config_check *stk = NULL; - int last_ret = 0, last_fn = 0, is_kme = 0, ret = -ENOMEM, p; + int is_kme = 0, ret = -ENOMEM, p; unsigned long io_base, ctl_base; void __iomem *io_addr, *ctl_addr; int n_ports = 1; @@ -296,8 +293,13 @@ static int pcmcia_init_one(struct pcmcia_device *pdev) } io_base = pdev->io.BasePort1; ctl_base = stk->ctl_base; - CS_CHECK(RequestIRQ, pcmcia_request_irq(pdev, &pdev->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(pdev, &pdev->conf)); + ret = pcmcia_request_irq(pdev, &pdev->irq); + if (ret) + goto failed; + + ret = pcmcia_request_configuration(pdev, &pdev->conf); + if (ret) + goto failed; /* iomap */ ret = -ENOMEM; @@ -351,8 +353,6 @@ static int pcmcia_init_one(struct pcmcia_device *pdev) kfree(stk); return 0; -cs_failed: - cs_error(pdev, last_fn, last_ret); failed: kfree(stk); info->ndev = 0; diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c index 063b933..6cee6c8 100644 --- a/drivers/ide/ide-cs.c +++ b/drivers/ide/ide-cs.c @@ -60,15 +60,6 @@ MODULE_AUTHOR("David Hinds "); MODULE_DESCRIPTION("PCMCIA ATA/IDE card driver"); MODULE_LICENSE("Dual MPL/GPL"); -#define INT_MODULE_PARM(n, v) static int n = v; module_param(n, int, 0) - -#ifdef CONFIG_PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -#else -#define DEBUG(n, args...) -#endif - /*====================================================================*/ typedef struct ide_info_t { @@ -98,7 +89,7 @@ static int ide_probe(struct pcmcia_device *link) { ide_info_t *info; - DEBUG(0, "ide_attach()\n"); + dev_dbg(&link->dev, "ide_attach()\n"); /* Create new ide device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -134,7 +125,7 @@ static void ide_detach(struct pcmcia_device *link) ide_hwif_t *hwif = info->host->ports[0]; unsigned long data_addr, ctl_addr; - DEBUG(0, "ide_detach(0x%p)\n", link); + dev_dbg(&link->dev, "ide_detach(0x%p)\n", link); data_addr = hwif->io_ports.data_addr; ctl_addr = hwif->io_ports.ctl_addr; @@ -217,9 +208,6 @@ out_release: ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - struct pcmcia_config_check { unsigned long ctl_base; int skip_vcc; @@ -282,11 +270,11 @@ static int ide_config(struct pcmcia_device *link) { ide_info_t *info = link->priv; struct pcmcia_config_check *stk = NULL; - int last_ret = 0, last_fn = 0, is_kme = 0; + int ret = 0, is_kme = 0; unsigned long io_base, ctl_base; struct ide_host *host; - DEBUG(0, "ide_config(0x%p)\n", link); + dev_dbg(&link->dev, "ide_config(0x%p)\n", link); is_kme = ((link->manf_id == MANFID_KME) && ((link->card_id == PRODID_KME_KXLC005_A) || @@ -306,8 +294,12 @@ static int ide_config(struct pcmcia_device *link) io_base = link->io.BasePort1; ctl_base = stk->ctl_base; - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* disable drive interrupts during IDE probe */ outb(0x02, ctl_base); @@ -342,8 +334,6 @@ err_mem: printk(KERN_NOTICE "ide-cs: ide_config failed memory allocation\n"); goto failed; -cs_failed: - cs_error(link, last_fn, last_ret); failed: kfree(stk); ide_release(link); @@ -363,7 +353,7 @@ static void ide_release(struct pcmcia_device *link) ide_info_t *info = link->priv; struct ide_host *host = info->host; - DEBUG(0, "ide_release(0x%p)\n", link); + dev_dbg(&link->dev, "ide_release(0x%p)\n", link); if (info->ndev) /* FIXME: if this fails we need to queue the cleanup somehow -- cgit v0.10.2 From 9ac3e58ceff0b7b8b981c09c38a28742270eea12 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:45:06 +0200 Subject: pcmcia: deprecate CS_CHECK (bluetooth) Remove all usages of the CS_CHECK macro and replace them with proper Linux style calling and return value checking. The extra error reporting may be dropped, as the PCMCIA core already complains about any (non-driver-author) errors. CC: linux-bluetooth@vger.kernel.org Signed-off-by: Dominik Brodowski diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c index b0e569b..1e0c4d8 100644 --- a/drivers/bluetooth/bluecard_cs.c +++ b/drivers/bluetooth/bluecard_cs.c @@ -905,22 +905,16 @@ static int bluecard_config(struct pcmcia_device *link) break; } - if (i != 0) { - cs_error(link, RequestIO, i); + if (i != 0) goto failed; - } i = pcmcia_request_irq(link, &link->irq); - if (i != 0) { - cs_error(link, RequestIRQ, i); + if (i != 0) link->irq.AssignedIRQ = 0; - } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) goto failed; - } if (bluecard_open(info) != 0) goto failed; diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c index d58e22b..9787fda 100644 --- a/drivers/bluetooth/bt3c_cs.c +++ b/drivers/bluetooth/bt3c_cs.c @@ -740,21 +740,16 @@ static int bt3c_config(struct pcmcia_device *link) goto found_port; BT_ERR("No usable port range found"); - cs_error(link, RequestIO, -ENODEV); goto failed; found_port: i = pcmcia_request_irq(link, &link->irq); - if (i != 0) { - cs_error(link, RequestIRQ, i); + if (i != 0) link->irq.AssignedIRQ = 0; - } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) goto failed; - } if (bt3c_open(info) != 0) goto failed; diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c index efd689a..f44d752 100644 --- a/drivers/bluetooth/btuart_cs.c +++ b/drivers/bluetooth/btuart_cs.c @@ -669,21 +669,16 @@ static int btuart_config(struct pcmcia_device *link) goto found_port; BT_ERR("No usable port range found"); - cs_error(link, RequestIO, -ENODEV); goto failed; found_port: i = pcmcia_request_irq(link, &link->irq); - if (i != 0) { - cs_error(link, RequestIRQ, i); + if (i != 0) link->irq.AssignedIRQ = 0; - } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) goto failed; - } if (btuart_open(info) != 0) goto failed; diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c index b881a9c..7cd8614 100644 --- a/drivers/bluetooth/dtl1_cs.c +++ b/drivers/bluetooth/dtl1_cs.c @@ -622,16 +622,12 @@ static int dtl1_config(struct pcmcia_device *link) goto failed; i = pcmcia_request_irq(link, &link->irq); - if (i != 0) { - cs_error(link, RequestIRQ, i); + if (i != 0) link->irq.AssignedIRQ = 0; - } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) goto failed; - } if (dtl1_open(info) != 0) goto failed; -- cgit v0.10.2 From f4a70c55376683213229af7266dc57ad81aee354 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 8 Nov 2009 16:16:45 +0300 Subject: x86, apic: Get rid of apicid_to_cpu_present assign on 64-bit In fact it's never get used on x86-64 (for 64 bit platform we use differ technique to enumerate io-units). Reported-by: Stephen Rothwell Signed-off-by: Cyrill Gorcunov Cc: Peter Zijlstra LKML-Reference: <20091108131645.GD5300@lenovo> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 9ab6ffb..89629f6 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -162,7 +162,12 @@ struct apic apic_noop = { .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, + +#ifdef CONFIG_X86_32 .apicid_to_cpu_present = default_apicid_to_cpu_present, +#else + .apicid_to_cpu_present = NULL, +#endif .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, -- cgit v0.10.2 From 0e1a6ef2dea88101b056b6d9984f3325c5efced3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 8 Nov 2009 09:37:00 -0800 Subject: sysctl: require CAP_SYS_RAWIO to set mmap_min_addr Currently the mmap_min_addr value can only be bypassed during mmap when the task has CAP_SYS_RAWIO. However, the mmap_min_addr sysctl value itself can be adjusted to 0 if euid == 0, allowing a bypass without CAP_SYS_RAWIO. This patch adds a check for the capability before allowing mmap_min_addr to be changed. Signed-off-by: Kees Cook Acked-by: Serge Hallyn Signed-off-by: James Morris diff --git a/security/min_addr.c b/security/min_addr.c index c844eed..fc43c9d 100644 --- a/security/min_addr.c +++ b/security/min_addr.c @@ -33,6 +33,9 @@ int mmap_min_addr_handler(struct ctl_table *table, int write, { int ret; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); update_mmap_min_addr(); -- cgit v0.10.2 From 6e65f92ff0d6f18580737321718d09035085a3fb Mon Sep 17 00:00:00 2001 From: John Johansen Date: Thu, 5 Nov 2009 17:03:20 -0800 Subject: Config option to set a default LSM The LSM currently requires setting a kernel parameter at boot to select a specific LSM. This adds a config option that allows specifying a default LSM that is used unless overridden with the security= kernel parameter. If the the config option is not set the current behavior of first LSM to register is used. Signed-off-by: John Johansen Acked-by: Serge Hallyn Signed-off-by: James Morris diff --git a/security/Kconfig b/security/Kconfig index aeea8c2..95cc089 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -152,5 +152,37 @@ source security/tomoyo/Kconfig source security/integrity/ima/Kconfig +choice + prompt "Default security module" + default DEFAULT_SECURITY_SELINUX if SECURITY_SELINUX + default DEFAULT_SECURITY_SMACK if SECURITY_SMACK + default DEFAULT_SECURITY_TOMOYO if SECURITY_TOMOYO + default DEFAULT_SECURITY_DAC + + help + Select the security module that will be used by default if the + kernel parameter security= is not specified. + + config DEFAULT_SECURITY_SELINUX + bool "SELinux" if SECURITY_SELINUX=y + + config DEFAULT_SECURITY_SMACK + bool "Simplified Mandatory Access Control" if SECURITY_SMACK=y + + config DEFAULT_SECURITY_TOMOYO + bool "TOMOYO" if SECURITY_TOMOYO=y + + config DEFAULT_SECURITY_DAC + bool "Unix Discretionary Access Controls" + +endchoice + +config DEFAULT_SECURITY + string + default "selinux" if DEFAULT_SECURITY_SELINUX + default "smack" if DEFAULT_SECURITY_SMACK + default "tomoyo" if DEFAULT_SECURITY_TOMOYO + default "" if DEFAULT_SECURITY_DAC + endmenu diff --git a/security/security.c b/security/security.c index 684d5ee..aad71b2 100644 --- a/security/security.c +++ b/security/security.c @@ -19,7 +19,8 @@ #include /* Boot-time LSM user choice */ -static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1]; +static __initdata char chosen_lsm[SECURITY_NAME_MAX + 1] = + CONFIG_DEFAULT_SECURITY; /* things that live in capability.c */ extern struct security_operations default_security_ops; @@ -80,8 +81,10 @@ __setup("security=", choose_lsm); * * Return true if: * -The passed LSM is the one chosen by user at boot time, - * -or user didn't specify a specific LSM and we're the first to ask - * for registration permission, + * -or the passed LSM is configured as the default and the user did not + * choose an alternate LSM at boot time, + * -or there is no default LSM set and the user didn't specify a + * specific LSM and we're the first to ask for registration permission, * -or the passed LSM is currently loaded. * Otherwise, return false. */ -- cgit v0.10.2 From 5ff0cfc67f00fe0feaa1da0b2359232ea4aa0ee7 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Mon, 9 Nov 2009 12:31:05 +0900 Subject: perf bench: Fix bench/sched-pipe.c to wait for child process Ingo reported this small 'perf bench sched pipe' output problem: | $ ./perf bench sched pipe | (executing 1000000 pipe operations between two tasks) | | Total time:4.898 sec | $ 4.898586 usecs/op | 204140 ops/sec | | the shell prompt came back before the usecs/op and ops/sec line | was printed. Process teardown race, lack of wait() or so? This caused by lack of calling waitpid() by parent process, so I added it. Signed-off-by: Hitoshi Mitake Cc: Rusty Russell Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Jiri Kosina LKML-Reference: <1257737465-7546-1-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 3214ed2..6a29100 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -26,6 +26,7 @@ #include #include #include +#include #define LOOPS_DEFAULT 1000000 static int loops = LOOPS_DEFAULT; @@ -58,8 +59,8 @@ int bench_sched_pipe(int argc, const char **argv, * discarding returned value of read(), write() * causes error in building environment for perf */ - int ret; - pid_t pid; + int ret, wait_stat; + pid_t pid, retpid; argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0); @@ -87,8 +88,11 @@ int bench_sched_pipe(int argc, const char **argv, gettimeofday(&stop, NULL); timersub(&stop, &start, &diff); - if (pid) + if (pid) { + retpid = waitpid(pid, &wait_stat, 0); + assert((retpid == pid) && WIFEXITED(wait_stat)); return 0; + } if (simple) printf("%lu.%03lu\n", -- cgit v0.10.2 From cbf624f0e18c4a05219855663a3e5f9fe8f2d876 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:47:29 +0200 Subject: pcmcia: use dynamic debug infrastructure, deprecate CS_CHECK (char) Convert PCMCIA drivers to use the dynamic debug infrastructure, instead of requiring manual settings of PCMCIA_DEBUG. Only some rare extra debug checks in cm4000_cs.c cm4040_cs.c are now hidden behind a "#ifdef CM4000_DEBUG" or "#ifdef CM4040_DEBUG". Also, remove all usages of the CS_CHECK macro and replace them with proper Linux style calling and return value checking. The extra error reporting may be dropped, as the PCMCIA core already complains about any (non-driver-author) errors. CC: Harald Welte CC: Jiri Kosina CC: David Sterba Signed-off-by: Dominik Brodowski diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index c250a31..1611c4f 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c @@ -23,8 +23,6 @@ * All rights reserved. Licensed under dual BSD/GPL license. */ -/* #define PCMCIA_DEBUG 6 */ - #include #include #include @@ -47,18 +45,17 @@ /* #define ATR_CSUM */ -#ifdef PCMCIA_DEBUG #define reader_to_dev(x) (&handle_to_dev(x->p_dev)) -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0600); -#define DEBUGP(n, rdr, x, args...) do { \ - if (pc_debug >= (n)) \ - dev_printk(KERN_DEBUG, reader_to_dev(rdr), "%s:" x, \ - __func__ , ## args); \ + +/* n (debug level) is ignored */ +/* additional debug output may be enabled by re-compiling with + * CM4000_DEBUG set */ +/* #define CM4000_DEBUG */ +#define DEBUGP(n, rdr, x, args...) do { \ + dev_dbg(reader_to_dev(rdr), "%s:" x, \ + __func__ , ## args); \ } while (0) -#else -#define DEBUGP(n, rdr, x, args...) -#endif + static char *version = "cm4000_cs.c v2.4.0gm6 - All bugs added by Harald Welte"; #define T_1SEC (HZ) @@ -174,14 +171,13 @@ static unsigned char fi_di_table[10][14] = { /* 9 */ {0x09,0x19,0x29,0x39,0x49,0x59,0x69,0x11,0x11,0x99,0xA9,0xB9,0xC9,0xD9} }; -#ifndef PCMCIA_DEBUG +#ifndef CM4000_DEBUG #define xoutb outb #define xinb inb #else static inline void xoutb(unsigned char val, unsigned short port) { - if (pc_debug >= 7) - printk(KERN_DEBUG "outb(val=%.2x,port=%.4x)\n", val, port); + pr_debug("outb(val=%.2x,port=%.4x)\n", val, port); outb(val, port); } static inline unsigned char xinb(unsigned short port) @@ -189,8 +185,7 @@ static inline unsigned char xinb(unsigned short port) unsigned char val; val = inb(port); - if (pc_debug >= 7) - printk(KERN_DEBUG "%.2x=inb(%.4x)\n", val, port); + pr_debug("%.2x=inb(%.4x)\n", val, port); return val; } @@ -514,12 +509,10 @@ static int set_protocol(struct cm4000_dev *dev, struct ptsreq *ptsreq) for (i = 0; i < 4; i++) { xoutb(i, REG_BUF_ADDR(iobase)); xoutb(dev->pts[i], REG_BUF_DATA(iobase)); /* buf data */ -#ifdef PCMCIA_DEBUG - if (pc_debug >= 5) - printk("0x%.2x ", dev->pts[i]); +#ifdef CM4000_DEBUG + pr_debug("0x%.2x ", dev->pts[i]); } - if (pc_debug >= 5) - printk("\n"); + pr_debug("\n"); #else } #endif @@ -579,14 +572,13 @@ static int set_protocol(struct cm4000_dev *dev, struct ptsreq *ptsreq) pts_reply[i] = inb(REG_BUF_DATA(iobase)); } -#ifdef PCMCIA_DEBUG +#ifdef CM4000_DEBUG DEBUGP(2, dev, "PTSreply: "); for (i = 0; i < num_bytes_read; i++) { - if (pc_debug >= 5) - printk("0x%.2x ", pts_reply[i]); + pr_debug("0x%.2x ", pts_reply[i]); } - printk("\n"); -#endif /* PCMCIA_DEBUG */ + pr_debug("\n"); +#endif /* CM4000_DEBUG */ DEBUGP(5, dev, "Clear Tactive in Flags1\n"); xoutb(0x20, REG_FLAGS1(iobase)); @@ -655,7 +647,7 @@ static void terminate_monitor(struct cm4000_dev *dev) DEBUGP(5, dev, "Delete timer\n"); del_timer_sync(&dev->timer); -#ifdef PCMCIA_DEBUG +#ifdef CM4000_DEBUG dev->monitor_running = 0; #endif @@ -898,7 +890,7 @@ static void monitor_card(unsigned long p) DEBUGP(4, dev, "ATR checksum (0x%.2x, should " "be zero) failed\n", dev->atr_csum); } -#ifdef PCMCIA_DEBUG +#ifdef CM4000_DEBUG else if (test_bit(IS_BAD_LENGTH, &dev->flags)) { DEBUGP(4, dev, "ATR length error\n"); } else { @@ -1415,7 +1407,7 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) int size; int rc; void __user *argp = (void __user *)arg; -#ifdef PCMCIA_DEBUG +#ifdef CM4000_DEBUG char *ioctl_names[CM_IOC_MAXNR + 1] = { [_IOC_NR(CM_IOCGSTATUS)] "CM_IOCGSTATUS", [_IOC_NR(CM_IOCGATR)] "CM_IOCGATR", @@ -1423,9 +1415,9 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) [_IOC_NR(CM_IOCSPTS)] "CM_IOCSPTS", [_IOC_NR(CM_IOSDBGLVL)] "CM4000_DBGLVL", }; -#endif DEBUGP(3, dev, "cmm_ioctl(device=%d.%d) %s\n", imajor(inode), iminor(inode), ioctl_names[_IOC_NR(cmd)]); +#endif lock_kernel(); rc = -ENODEV; @@ -1523,7 +1515,7 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } case CM_IOCARDOFF: -#ifdef PCMCIA_DEBUG +#ifdef CM4000_DEBUG DEBUGP(4, dev, "... in CM_IOCARDOFF\n"); if (dev->flags0 & 0x01) { DEBUGP(4, dev, " Card inserted\n"); @@ -1625,18 +1617,9 @@ static long cmm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } break; -#ifdef PCMCIA_DEBUG - case CM_IOSDBGLVL: /* set debug log level */ - { - int old_pc_debug = 0; - - old_pc_debug = pc_debug; - if (copy_from_user(&pc_debug, argp, sizeof(int))) - rc = -EFAULT; - else if (old_pc_debug != pc_debug) - DEBUGP(0, dev, "Changed debug log level " - "to %i\n", pc_debug); - } +#ifdef CM4000_DEBUG + case CM_IOSDBGLVL: + rc = -ENOTTY; break; #endif default: diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index 4f0723b..0001ad2 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -17,8 +17,6 @@ * All rights reserved, Dual BSD/GPL Licensed. */ -/* #define PCMCIA_DEBUG 6 */ - #include #include #include @@ -41,18 +39,16 @@ #include "cm4040_cs.h" -#ifdef PCMCIA_DEBUG #define reader_to_dev(x) (&handle_to_dev(x->p_dev)) -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0600); -#define DEBUGP(n, rdr, x, args...) do { \ - if (pc_debug >= (n)) \ - dev_printk(KERN_DEBUG, reader_to_dev(rdr), "%s:" x, \ - __func__ , ##args); \ + +/* n (debug level) is ignored */ +/* additional debug output may be enabled by re-compiling with + * CM4040_DEBUG set */ +/* #define CM4040_DEBUG */ +#define DEBUGP(n, rdr, x, args...) do { \ + dev_dbg(reader_to_dev(rdr), "%s:" x, \ + __func__ , ## args); \ } while (0) -#else -#define DEBUGP(n, rdr, x, args...) -#endif static char *version = "OMNIKEY CardMan 4040 v1.1.0gm5 - All bugs added by Harald Welte"; @@ -90,14 +86,13 @@ struct reader_dev { static struct pcmcia_device *dev_table[CM_MAX_DEV]; -#ifndef PCMCIA_DEBUG +#ifndef CM4040_DEBUG #define xoutb outb #define xinb inb #else static inline void xoutb(unsigned char val, unsigned short port) { - if (pc_debug >= 7) - printk(KERN_DEBUG "outb(val=%.2x,port=%.4x)\n", val, port); + pr_debug("outb(val=%.2x,port=%.4x)\n", val, port); outb(val, port); } @@ -106,8 +101,7 @@ static inline unsigned char xinb(unsigned short port) unsigned char val; val = inb(port); - if (pc_debug >= 7) - printk(KERN_DEBUG "%.2x=inb(%.4x)\n", val, port); + pr_debug("%.2x=inb(%.4x)\n", val, port); return val; } #endif @@ -260,11 +254,10 @@ static ssize_t cm4040_read(struct file *filp, char __user *buf, return -EIO; } dev->r_buf[i] = xinb(iobase + REG_OFFSET_BULK_IN); -#ifdef PCMCIA_DEBUG - if (pc_debug >= 6) - printk(KERN_DEBUG "%lu:%2x ", i, dev->r_buf[i]); +#ifdef CM4040_DEBUG + pr_debug("%lu:%2x ", i, dev->r_buf[i]); } - printk("\n"); + pr_debug("\n"); #else } #endif @@ -288,11 +281,10 @@ static ssize_t cm4040_read(struct file *filp, char __user *buf, return -EIO; } dev->r_buf[i+5] = xinb(iobase + REG_OFFSET_BULK_IN); -#ifdef PCMCIA_DEBUG - if (pc_debug >= 6) - printk(KERN_DEBUG "%lu:%2x ", i, dev->r_buf[i]); +#ifdef CM4040_DEBUG + pr_debug("%lu:%2x ", i, dev->r_buf[i]); } - printk("\n"); + pr_debug("\n"); #else } #endif diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 0f46749..24bffa4 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -65,10 +65,7 @@ static void signalled_reboot_work(struct work_struct *work_reboot) struct ipw_dev *ipw = container_of(work_reboot, struct ipw_dev, work_reboot); struct pcmcia_device *link = ipw->link; - int ret = pcmcia_reset_card(link->socket); - - if (ret != 0) - cs_error(link, ResetCard, ret); + pcmcia_reset_card(link->socket); } static void signalled_reboot_callback(void *callback_data) @@ -122,10 +119,8 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, ret = pcmcia_request_window(&p_dev, &ipw->request_common_memory, &ipw->handle_common_memory); - if (ret != 0) { - cs_error(p_dev, RequestWindow, ret); + if (ret != 0) goto exit1; - } memreq_common_memory.CardOffset = cfg->mem.win[0].card_addr; memreq_common_memory.Page = 0; @@ -133,10 +128,8 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, ret = pcmcia_map_mem_page(ipw->handle_common_memory, &memreq_common_memory); - if (ret != 0) { - cs_error(p_dev, MapMemPage, ret); + if (ret != 0) goto exit2; - } ipw->is_v2_card = cfg->mem.win[0].len == 0x100; @@ -155,10 +148,8 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, ret = pcmcia_request_window(&p_dev, &ipw->request_attr_memory, &ipw->handle_attr_memory); - if (ret != 0) { - cs_error(p_dev, RequestWindow, ret); + if (ret != 0) goto exit2; - } memreq_attr_memory.CardOffset = 0; memreq_attr_memory.Page = 0; @@ -166,10 +157,8 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, ret = pcmcia_map_mem_page(ipw->handle_attr_memory, &memreq_attr_memory); - if (ret != 0) { - cs_error(p_dev, MapMemPage, ret); + if (ret != 0) goto exit3; - } ipw->attr_memory = ioremap(ipw->request_attr_memory.Base, ipw->request_attr_memory.Size); @@ -202,10 +191,8 @@ static int config_ipwireless(struct ipw_dev *ipw) ipw->is_v2_card = 0; ret = pcmcia_loop_config(link, ipwireless_probe, ipw); - if (ret != 0) { - cs_error(link, RequestIO, ret); + if (ret != 0) return ret; - } link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -223,10 +210,8 @@ static int config_ipwireless(struct ipw_dev *ipw) ret = pcmcia_request_irq(link, &link->irq); - if (ret != 0) { - cs_error(link, RequestIRQ, ret); + if (ret != 0) goto exit; - } printk(KERN_INFO IPWIRELESS_PCCARD_NAME ": Card type %s\n", ipw->is_v2_card ? "V2/V3" : "V1"); @@ -263,10 +248,8 @@ static int config_ipwireless(struct ipw_dev *ipw) */ ret = pcmcia_request_configuration(link, &link->conf); - if (ret != 0) { - cs_error(link, RequestConfiguration, ret); + if (ret != 0) goto exit; - } link->dev_node = &ipw->nodes[0]; @@ -347,7 +330,6 @@ static int ipwireless_attach(struct pcmcia_device *link) ret = config_ipwireless(ipw); if (ret != 0) { - cs_error(link, RegisterClient, ret); ipwireless_detach(link); return ret; } diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 429b731..09b2590 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -572,9 +572,6 @@ static int mgslpc_probe(struct pcmcia_device *link) /* Card has been inserted. */ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int mgslpc_ioprobe(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -598,15 +595,14 @@ static int mgslpc_ioprobe(struct pcmcia_device *p_dev, static int mgslpc_config(struct pcmcia_device *link) { MGSLPC_INFO *info = link->priv; - int last_fn = RequestIO; - int last_ret; + int ret; if (debug_level >= DEBUG_LEVEL_INFO) printk("mgslpc_config(0x%p)\n", link); - last_ret = pcmcia_loop_config(link, mgslpc_ioprobe, NULL); - if (last_ret != 0) - goto cs_failed; + ret = pcmcia_loop_config(link, mgslpc_ioprobe, NULL); + if (ret != 0) + goto failed; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -616,9 +612,13 @@ static int mgslpc_config(struct pcmcia_device *link) link->irq.Attributes |= IRQ_HANDLE_PRESENT; link->irq.Handler = mgslpc_isr; link->irq.Instance = info; - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; info->io_base = link->io.BasePort1; info->irq_level = link->irq.AssignedIRQ; @@ -638,8 +638,7 @@ static int mgslpc_config(struct pcmcia_device *link) printk("\n"); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); +failed: mgslpc_release((u_long)link); return -ENODEV; } -- cgit v0.10.2 From e773cfe167c320d07b9423bc51fc4ab0221775a4 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:50:13 +0200 Subject: pcmcia: use dynamic debug infrastructure, deprecate CS_CHECK (isdn) Convert PCMCIA drivers to use the dynamic debug infrastructure, instead of requiring manual settings of PCMCIA_DEBUG. Also, remove all usages of the CS_CHECK macro and replace them with proper Linux style calling and return value checking. The extra error reporting may be dropped, as the PCMCIA core already complains about any (non-driver-author) errors. CC: Karsten Keil Signed-off-by: Dominik Brodowski diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c index c725655..d388ead 100644 --- a/drivers/isdn/hardware/avm/avm_cs.c +++ b/drivers/isdn/hardware/avm/avm_cs.c @@ -198,7 +198,6 @@ static int avmcs_config(struct pcmcia_device *link) */ i = pcmcia_request_irq(link, &link->irq); if (i != 0) { - cs_error(link, RequestIRQ, i); /* undo */ pcmcia_disable_device(link); break; @@ -209,7 +208,6 @@ static int avmcs_config(struct pcmcia_device *link) */ i = pcmcia_request_configuration(link, &link->conf); if (i != 0) { - cs_error(link, RequestConfiguration, i); pcmcia_disable_device(link); break; } diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c index 23560c8..6d963f9 100644 --- a/drivers/isdn/hisax/avma1_cs.c +++ b/drivers/isdn/hisax/avma1_cs.c @@ -30,22 +30,6 @@ MODULE_DESCRIPTION("ISDN4Linux: PCMCIA client driver for AVM A1/Fritz!PCMCIA car MODULE_AUTHOR("Carsten Paeth"); MODULE_LICENSE("GPL"); -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); -static char *version = -"avma1_cs.c 1.00 1998/01/23 10:00:00 (Carsten Paeth)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -119,7 +103,7 @@ static int avma1cs_probe(struct pcmcia_device *p_dev) { local_info_t *local; - DEBUG(0, "avma1cs_attach()\n"); + dev_dbg(&p_dev->dev, "avma1cs_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(local_info_t), GFP_KERNEL); @@ -161,7 +145,7 @@ static int avma1cs_probe(struct pcmcia_device *p_dev) static void avma1cs_detach(struct pcmcia_device *link) { - DEBUG(0, "avma1cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "avma1cs_detach(0x%p)\n", link); avma1cs_release(link); kfree(link->priv); } /* avma1cs_detach */ @@ -203,7 +187,7 @@ static int avma1cs_config(struct pcmcia_device *link) dev = link->priv; - DEBUG(0, "avma1cs_config(0x%p)\n", link); + dev_dbg(&link->dev, "avma1cs_config(0x%p)\n", link); devname[0] = 0; if (link->prod_id[1]) @@ -218,7 +202,6 @@ static int avma1cs_config(struct pcmcia_device *link) */ i = pcmcia_request_irq(link, &link->irq); if (i != 0) { - cs_error(link, RequestIRQ, i); /* undo */ pcmcia_disable_device(link); break; @@ -229,7 +212,6 @@ static int avma1cs_config(struct pcmcia_device *link) */ i = pcmcia_request_configuration(link, &link->conf); if (i != 0) { - cs_error(link, RequestConfiguration, i); pcmcia_disable_device(link); break; } @@ -281,7 +263,7 @@ static void avma1cs_release(struct pcmcia_device *link) { local_info_t *local = link->priv; - DEBUG(0, "avma1cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "avma1cs_release(0x%p)\n", link); /* now unregister function with hisax */ HiSax_closecard(local->node.minor); diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c index f4d0fe2..cdcd297 100644 --- a/drivers/isdn/hisax/elsa_cs.c +++ b/drivers/isdn/hisax/elsa_cs.c @@ -57,23 +57,6 @@ MODULE_DESCRIPTION("ISDN4Linux: PCMCIA client driver for Elsa PCM cards"); MODULE_AUTHOR("Klaus Lichtenwalder"); MODULE_LICENSE("Dual MPL/GPL"); -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); -static char *version = -"elsa_cs.c $Revision: 1.2.2.4 $ $Date: 2004/01/25 15:07:06 $ (K.Lichtenwalder)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -142,7 +125,7 @@ static int elsa_cs_probe(struct pcmcia_device *link) { local_info_t *local; - DEBUG(0, "elsa_cs_attach()\n"); + dev_dbg(&link->dev, "elsa_cs_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(local_info_t), GFP_KERNEL); @@ -188,7 +171,7 @@ static void elsa_cs_detach(struct pcmcia_device *link) { local_info_t *info = link->priv; - DEBUG(0, "elsa_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "elsa_cs_detach(0x%p)\n", link); info->busy = 1; elsa_cs_release(link); @@ -231,30 +214,25 @@ static int elsa_cs_configcheck(struct pcmcia_device *p_dev, static int elsa_cs_config(struct pcmcia_device *link) { local_info_t *dev; - int i, last_fn; + int i; IsdnCard_t icard; - DEBUG(0, "elsa_config(0x%p)\n", link); + dev_dbg(&link->dev, "elsa_config(0x%p)\n", link); dev = link->priv; i = pcmcia_loop_config(link, elsa_cs_configcheck, NULL); - if (i != 0) { - last_fn = RequestIO; - goto cs_failed; - } + if (i != 0) + goto failed; i = pcmcia_request_irq(link, &link->irq); if (i != 0) { link->irq.AssignedIRQ = 0; - last_fn = RequestIRQ; - goto cs_failed; + goto failed; } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - last_fn = RequestConfiguration; - goto cs_failed; - } + if (i != 0) + goto failed; /* At this point, the dev_node_t structure(s) should be initialized and arranged in a linked list at link->dev. *//* */ @@ -290,8 +268,7 @@ static int elsa_cs_config(struct pcmcia_device *link) ((local_info_t*)link->priv)->cardnr = i; return 0; -cs_failed: - cs_error(link, last_fn, i); +failed: elsa_cs_release(link); return -ENODEV; } /* elsa_cs_config */ @@ -308,7 +285,7 @@ static void elsa_cs_release(struct pcmcia_device *link) { local_info_t *local = link->priv; - DEBUG(0, "elsa_cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "elsa_cs_release(0x%p)\n", link); if (local) { if (local->cardnr >= 0) { diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c index 9a3c9f5..33d7530 100644 --- a/drivers/isdn/hisax/sedlbauer_cs.c +++ b/drivers/isdn/hisax/sedlbauer_cs.c @@ -57,24 +57,6 @@ MODULE_DESCRIPTION("ISDN4Linux: PCMCIA client driver for Sedlbauer cards"); MODULE_AUTHOR("Marcus Niemann"); MODULE_LICENSE("Dual MPL/GPL"); -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); -static char *version = -"sedlbauer_cs.c 1.1a 2001/01/28 15:04:04 (M.Niemann)"; -#else -#define DEBUG(n, args...) -#endif - /*====================================================================*/ @@ -151,7 +133,7 @@ static int sedlbauer_probe(struct pcmcia_device *link) { local_info_t *local; - DEBUG(0, "sedlbauer_attach()\n"); + dev_dbg(&link->dev, "sedlbauer_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(local_info_t), GFP_KERNEL); @@ -198,7 +180,7 @@ static int sedlbauer_probe(struct pcmcia_device *link) static void sedlbauer_detach(struct pcmcia_device *link) { - DEBUG(0, "sedlbauer_detach(0x%p)\n", link); + dev_dbg(&link->dev, "sedlbauer_detach(0x%p)\n", link); ((local_info_t *)link->priv)->stop = 1; sedlbauer_release(link); @@ -214,9 +196,6 @@ static void sedlbauer_detach(struct pcmcia_device *link) device available to the system. ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int sedlbauer_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -309,10 +288,10 @@ static int sedlbauer_config(struct pcmcia_device *link) { local_info_t *dev = link->priv; win_req_t *req; - int last_fn, last_ret; + int ret; IsdnCard_t icard; - DEBUG(0, "sedlbauer_config(0x%p)\n", link); + dev_dbg(&link->dev, "sedlbauer_config(0x%p)\n", link); req = kzalloc(sizeof(win_req_t), GFP_KERNEL); if (!req) @@ -330,8 +309,8 @@ static int sedlbauer_config(struct pcmcia_device *link) these things without consulting the CIS, and most client drivers will only use the CIS to fill in implementation-defined details. */ - last_ret = pcmcia_loop_config(link, sedlbauer_config_check, req); - if (last_ret) + ret = pcmcia_loop_config(link, sedlbauer_config_check, req); + if (ret) goto failed; /* @@ -339,15 +318,20 @@ static int sedlbauer_config(struct pcmcia_device *link) handler to the interrupt, unless the 'Handler' member of the irq structure is initialized. */ - if (link->conf.Attributes & CONF_ENABLE_IRQ) - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + if (link->conf.Attributes & CONF_ENABLE_IRQ) { + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + } /* This actually configures the PCMCIA socket -- setting up the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -380,19 +364,18 @@ static int sedlbauer_config(struct pcmcia_device *link) icard.protocol = protocol; icard.typ = ISDN_CTYPE_SEDLBAUER_PCMCIA; - last_ret = hisax_init_pcmcia(link, &(((local_info_t*)link->priv)->stop), &icard); - if (last_ret < 0) { - printk(KERN_ERR "sedlbauer_cs: failed to initialize SEDLBAUER PCMCIA %d at i/o %#x\n", - last_ret, link->io.BasePort1); + ret = hisax_init_pcmcia(link, + &(((local_info_t *)link->priv)->stop), &icard); + if (ret < 0) { + printk(KERN_ERR "sedlbauer_cs: failed to initialize SEDLBAUER PCMCIA %d at i/o %#x\n", + ret, link->io.BasePort1); sedlbauer_release(link); return -ENODEV; } else - ((local_info_t*)link->priv)->cardnr = last_ret; + ((local_info_t *)link->priv)->cardnr = ret; return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: sedlbauer_release(link); return -ENODEV; @@ -410,7 +393,7 @@ failed: static void sedlbauer_release(struct pcmcia_device *link) { local_info_t *local = link->priv; - DEBUG(0, "sedlbauer_release(0x%p)\n", link); + dev_dbg(&link->dev, "sedlbauer_release(0x%p)\n", link); if (local) { if (local->cardnr >= 0) { diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c index 623d111..7b11c15 100644 --- a/drivers/isdn/hisax/teles_cs.c +++ b/drivers/isdn/hisax/teles_cs.c @@ -38,23 +38,6 @@ MODULE_DESCRIPTION("ISDN4Linux: PCMCIA client driver for Teles PCMCIA cards"); MODULE_AUTHOR("Christof Petig, christof.petig@wtal.de, Karsten Keil, kkeil@suse.de"); MODULE_LICENSE("GPL"); -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); -static char *version = -"teles_cs.c 2.10 2002/07/30 22:23:34 kkeil"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -133,7 +116,7 @@ static int teles_probe(struct pcmcia_device *link) { local_info_t *local; - DEBUG(0, "teles_attach()\n"); + dev_dbg(&link->dev, "teles_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(local_info_t), GFP_KERNEL); @@ -178,7 +161,7 @@ static void teles_detach(struct pcmcia_device *link) { local_info_t *info = link->priv; - DEBUG(0, "teles_detach(0x%p)\n", link); + dev_dbg(&link->dev, "teles_detach(0x%p)\n", link); info->busy = 1; teles_cs_release(link); @@ -221,30 +204,25 @@ static int teles_cs_configcheck(struct pcmcia_device *p_dev, static int teles_cs_config(struct pcmcia_device *link) { local_info_t *dev; - int i, last_fn; + int i; IsdnCard_t icard; - DEBUG(0, "teles_config(0x%p)\n", link); + dev_dbg(&link->dev, "teles_config(0x%p)\n", link); dev = link->priv; i = pcmcia_loop_config(link, teles_cs_configcheck, NULL); - if (i != 0) { - last_fn = RequestIO; + if (i != 0) goto cs_failed; - } i = pcmcia_request_irq(link, &link->irq); if (i != 0) { link->irq.AssignedIRQ = 0; - last_fn = RequestIRQ; goto cs_failed; } i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - last_fn = RequestConfiguration; + if (i != 0) goto cs_failed; - } /* At this point, the dev_node_t structure(s) should be initialized and arranged in a linked list at link->dev. *//* */ @@ -283,7 +261,6 @@ static int teles_cs_config(struct pcmcia_device *link) return 0; cs_failed: - cs_error(link, last_fn, i); teles_cs_release(link); return -ENODEV; } /* teles_cs_config */ @@ -300,7 +277,7 @@ static void teles_cs_release(struct pcmcia_device *link) { local_info_t *local = link->priv; - DEBUG(0, "teles_cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "teles_cs_release(0x%p)\n", link); if (local) { if (local->cardnr >= 0) { -- cgit v0.10.2 From dd0fab5b940c0b65f26ac5b01485bac1f690ace6 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:51:05 +0200 Subject: pcmcia: use dynamic debug infrastructure, deprecate CS_CHECK (net) Convert PCMCIA drivers to use the dynamic debug infrastructure, instead of requiring manual settings of PCMCIA_DEBUG. Only some rare debug checks are now hidden behind "#ifdef DEBUG" or "#if 0". Also, remove all usages of the CS_CHECK macro and replace them with proper Linux style calling and return value checking. The extra error reporting may be dropped, as the PCMCIA core already complains about any (non-driver-author) errors. CC: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index 6449290..dbef5d9 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -118,14 +118,6 @@ INT_MODULE_PARM(full_duplex, 0); /* Autodetect link polarity reversal? */ INT_MODULE_PARM(auto_polarity, 1); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"3c574_cs.c 1.65ac1 2003/04/07 Donald Becker/David Hinds, becker@scyld.com.\n"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -278,7 +270,7 @@ static int tc574_probe(struct pcmcia_device *link) struct el3_private *lp; struct net_device *dev; - DEBUG(0, "3c574_attach()\n"); + dev_dbg(&link->dev, "3c574_attach()\n"); /* Create the PC card device object. */ dev = alloc_etherdev(sizeof(struct el3_private)); @@ -319,7 +311,7 @@ static void tc574_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "3c574_detach(0x%p)\n", link); + dev_dbg(&link->dev, "3c574_detach()\n"); if (link->dev_node) unregister_netdev(dev); @@ -335,16 +327,13 @@ static void tc574_detach(struct pcmcia_device *link) ethernet device available to the system. */ -#define CS_CHECK(fn, ret) \ - do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static const char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; static int tc574_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; struct el3_private *lp = netdev_priv(dev); - int last_fn, last_ret, i, j; + int ret, i, j; unsigned int ioaddr; __be16 *phys_addr; char *cardname; @@ -354,7 +343,7 @@ static int tc574_config(struct pcmcia_device *link) phys_addr = (__be16 *)dev->dev_addr; - DEBUG(0, "3c574_config(0x%p)\n", link); + dev_dbg(&link->dev, "3c574_config()\n"); link->io.IOAddrLines = 16; for (i = j = 0; j < 0x400; j += 0x20) { @@ -363,12 +352,16 @@ static int tc574_config(struct pcmcia_device *link) if (i == 0) break; } - if (i != 0) { - cs_error(link, RequestIO, i); + if (i != 0) + goto failed; + + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -433,7 +426,8 @@ static int tc574_config(struct pcmcia_device *link) mii_status = mdio_read(ioaddr, phy & 0x1f, 1); if (mii_status != 0xffff) { lp->phys = phy & 0x1f; - DEBUG(0, " MII transceiver at index %d, status %x.\n", + dev_dbg(&link->dev, " MII transceiver at " + "index %d, status %x.\n", phy, mii_status); if ((mii_status & 0x0040) == 0) mii_preamble_required = 1; @@ -476,8 +470,6 @@ static int tc574_config(struct pcmcia_device *link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: tc574_release(link); return -ENODEV; @@ -736,7 +728,7 @@ static int el3_open(struct net_device *dev) lp->media.expires = jiffies + HZ; add_timer(&lp->media); - DEBUG(2, "%s: opened, status %4.4x.\n", + dev_dbg(&link->dev, "%s: opened, status %4.4x.\n", dev->name, inw(dev->base_addr + EL3_STATUS)); return 0; @@ -770,7 +762,7 @@ static void pop_tx_status(struct net_device *dev) if (tx_status & 0x30) tc574_wait_for_completion(dev, TxReset); if (tx_status & 0x38) { - DEBUG(1, "%s: transmit error: status 0x%02x\n", + pr_debug("%s: transmit error: status 0x%02x\n", dev->name, tx_status); outw(TxEnable, ioaddr + EL3_CMD); dev->stats.tx_aborted_errors++; @@ -786,7 +778,7 @@ static netdev_tx_t el3_start_xmit(struct sk_buff *skb, struct el3_private *lp = netdev_priv(dev); unsigned long flags; - DEBUG(3, "%s: el3_start_xmit(length = %ld) called, " + pr_debug("%s: el3_start_xmit(length = %ld) called, " "status %4.4x.\n", dev->name, (long)skb->len, inw(ioaddr + EL3_STATUS)); @@ -825,7 +817,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) return IRQ_NONE; ioaddr = dev->base_addr; - DEBUG(3, "%s: interrupt, status %4.4x.\n", + pr_debug("%s: interrupt, status %4.4x.\n", dev->name, inw(ioaddr + EL3_STATUS)); spin_lock(&lp->window_lock); @@ -834,7 +826,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) (IntLatch | RxComplete | RxEarly | StatsFull)) { if (!netif_device_present(dev) || ((status & 0xe000) != 0x2000)) { - DEBUG(1, "%s: Interrupt from dead card\n", dev->name); + pr_debug("%s: Interrupt from dead card\n", dev->name); break; } @@ -844,7 +836,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) work_budget = el3_rx(dev, work_budget); if (status & TxAvailable) { - DEBUG(3, " TX room bit was handled.\n"); + pr_debug(" TX room bit was handled.\n"); /* There's room in the FIFO for a full-sized packet. */ outw(AckIntr | TxAvailable, ioaddr + EL3_CMD); netif_wake_queue(dev); @@ -884,7 +876,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) } if (--work_budget < 0) { - DEBUG(0, "%s: Too much work in interrupt, " + pr_debug("%s: Too much work in interrupt, " "status %4.4x.\n", dev->name, status); /* Clear all interrupts */ outw(AckIntr | 0xFF, ioaddr + EL3_CMD); @@ -894,7 +886,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) outw(AckIntr | IntReq | IntLatch, ioaddr + EL3_CMD); } - DEBUG(3, "%s: exiting interrupt, status %4.4x.\n", + pr_debug("%s: exiting interrupt, status %4.4x.\n", dev->name, inw(ioaddr + EL3_STATUS)); spin_unlock(&lp->window_lock); @@ -1001,7 +993,7 @@ static void update_stats(struct net_device *dev) unsigned int ioaddr = dev->base_addr; u8 rx, tx, up; - DEBUG(2, "%s: updating the statistics.\n", dev->name); + pr_debug("%s: updating the statistics.\n", dev->name); if (inw(ioaddr+EL3_STATUS) == 0xffff) /* No card. */ return; @@ -1037,7 +1029,7 @@ static int el3_rx(struct net_device *dev, int worklimit) unsigned int ioaddr = dev->base_addr; short rx_status; - DEBUG(3, "%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n", + pr_debug("%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n", dev->name, inw(ioaddr+EL3_STATUS), inw(ioaddr+RxStatus)); while (!((rx_status = inw(ioaddr + RxStatus)) & 0x8000) && worklimit > 0) { @@ -1059,7 +1051,7 @@ static int el3_rx(struct net_device *dev, int worklimit) skb = dev_alloc_skb(pkt_len+5); - DEBUG(3, " Receiving packet size %d status %4.4x.\n", + pr_debug(" Receiving packet size %d status %4.4x.\n", pkt_len, rx_status); if (skb != NULL) { skb_reserve(skb, 2); @@ -1070,7 +1062,7 @@ static int el3_rx(struct net_device *dev, int worklimit) dev->stats.rx_packets++; dev->stats.rx_bytes += pkt_len; } else { - DEBUG(1, "%s: couldn't allocate a sk_buff of" + pr_debug("%s: couldn't allocate a sk_buff of" " size %d.\n", dev->name, pkt_len); dev->stats.rx_dropped++; } @@ -1099,7 +1091,7 @@ static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) struct mii_ioctl_data *data = if_mii(rq); int phy = lp->phys & 0x1f; - DEBUG(2, "%s: In ioct(%-.6s, %#4.4x) %4.4x %4.4x %4.4x %4.4x.\n", + pr_debug("%s: In ioct(%-.6s, %#4.4x) %4.4x %4.4x %4.4x %4.4x.\n", dev->name, rq->ifr_ifrn.ifrn_name, cmd, data->phy_id, data->reg_num, data->val_in, data->val_out); @@ -1176,7 +1168,7 @@ static int el3_close(struct net_device *dev) struct el3_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - DEBUG(2, "%s: shutting down ethercard.\n", dev->name); + dev_dbg(&link->dev, "%s: shutting down ethercard.\n", dev->name); if (pcmcia_dev_present(link)) { unsigned long flags; diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c index fc6123e..6eac62b 100644 --- a/drivers/net/pcmcia/3c589_cs.c +++ b/drivers/net/pcmcia/3c589_cs.c @@ -130,14 +130,6 @@ MODULE_LICENSE("GPL"); /* Special hook for setting if_port when module is loaded */ INT_MODULE_PARM(if_port, 0); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -DRV_NAME ".c " DRV_VERSION " 2001/10/13 00:08:50 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -189,7 +181,7 @@ static int tc589_probe(struct pcmcia_device *link) struct el3_private *lp; struct net_device *dev; - DEBUG(0, "3c589_attach()\n"); + dev_dbg(&link->dev, "3c589_attach()\n"); /* Create new ethernet device */ dev = alloc_etherdev(sizeof(struct el3_private)); @@ -231,7 +223,7 @@ static void tc589_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "3c589_detach(0x%p)\n", link); + dev_dbg(&link->dev, "3c589_detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -249,21 +241,18 @@ static void tc589_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int tc589_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; struct el3_private *lp = netdev_priv(dev); __be16 *phys_addr; - int last_fn, last_ret, i, j, multi = 0, fifo; + int ret, i, j, multi = 0, fifo; unsigned int ioaddr; char *ram_split[] = {"5:3", "3:1", "1:1", "3:5"}; u8 *buf; size_t len; - DEBUG(0, "3c589_config(0x%p)\n", link); + dev_dbg(&link->dev, "3c589_config\n"); phys_addr = (__be16 *)dev->dev_addr; /* Is this a 3c562? */ @@ -281,12 +270,16 @@ static int tc589_config(struct pcmcia_device *link) if (i == 0) break; } - if (i != 0) { - cs_error(link, RequestIO, i); + if (i != 0) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -342,8 +335,6 @@ static int tc589_config(struct pcmcia_device *link) if_names[dev->if_port]); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: tc589_release(link); return -ENODEV; @@ -506,24 +497,8 @@ static void netdev_get_drvinfo(struct net_device *dev, sprintf(info->bus_info, "PCMCIA 0x%lx", dev->base_addr); } -#ifdef PCMCIA_DEBUG -static u32 netdev_get_msglevel(struct net_device *dev) -{ - return pc_debug; -} - -static void netdev_set_msglevel(struct net_device *dev, u32 level) -{ - pc_debug = level; -} -#endif /* PCMCIA_DEBUG */ - static const struct ethtool_ops netdev_ethtool_ops = { .get_drvinfo = netdev_get_drvinfo, -#ifdef PCMCIA_DEBUG - .get_msglevel = netdev_get_msglevel, - .set_msglevel = netdev_set_msglevel, -#endif /* PCMCIA_DEBUG */ }; static int el3_config(struct net_device *dev, struct ifmap *map) @@ -558,7 +533,7 @@ static int el3_open(struct net_device *dev) lp->media.expires = jiffies + HZ; add_timer(&lp->media); - DEBUG(1, "%s: opened, status %4.4x.\n", + dev_dbg(&link->dev, "%s: opened, status %4.4x.\n", dev->name, inw(dev->base_addr + EL3_STATUS)); return 0; @@ -591,7 +566,7 @@ static void pop_tx_status(struct net_device *dev) if (tx_status & 0x30) tc589_wait_for_completion(dev, TxReset); if (tx_status & 0x38) { - DEBUG(1, "%s: transmit error: status 0x%02x\n", + pr_debug("%s: transmit error: status 0x%02x\n", dev->name, tx_status); outw(TxEnable, ioaddr + EL3_CMD); dev->stats.tx_aborted_errors++; @@ -607,7 +582,7 @@ static netdev_tx_t el3_start_xmit(struct sk_buff *skb, struct el3_private *priv = netdev_priv(dev); unsigned long flags; - DEBUG(3, "%s: el3_start_xmit(length = %ld) called, " + pr_debug("%s: el3_start_xmit(length = %ld) called, " "status %4.4x.\n", dev->name, (long)skb->len, inw(ioaddr + EL3_STATUS)); @@ -649,14 +624,14 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) ioaddr = dev->base_addr; - DEBUG(3, "%s: interrupt, status %4.4x.\n", + pr_debug("%s: interrupt, status %4.4x.\n", dev->name, inw(ioaddr + EL3_STATUS)); spin_lock(&lp->lock); while ((status = inw(ioaddr + EL3_STATUS)) & (IntLatch | RxComplete | StatsFull)) { if ((status & 0xe000) != 0x2000) { - DEBUG(1, "%s: interrupt from dead card\n", dev->name); + pr_debug("%s: interrupt from dead card\n", dev->name); handled = 0; break; } @@ -665,7 +640,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) el3_rx(dev); if (status & TxAvailable) { - DEBUG(3, " TX room bit was handled.\n"); + pr_debug(" TX room bit was handled.\n"); /* There's room in the FIFO for a full-sized packet. */ outw(AckIntr | TxAvailable, ioaddr + EL3_CMD); netif_wake_queue(dev); @@ -717,7 +692,7 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id) lp->last_irq = jiffies; spin_unlock(&lp->lock); - DEBUG(3, "%s: exiting interrupt, status %4.4x.\n", + pr_debug("%s: exiting interrupt, status %4.4x.\n", dev->name, inw(ioaddr + EL3_STATUS)); return IRQ_RETVAL(handled); } @@ -828,7 +803,7 @@ static void update_stats(struct net_device *dev) { unsigned int ioaddr = dev->base_addr; - DEBUG(2, "%s: updating the statistics.\n", dev->name); + pr_debug("%s: updating the statistics.\n", dev->name); /* Turn off statistics updates while reading. */ outw(StatsDisable, ioaddr + EL3_CMD); /* Switch to the stats window, and read everything. */ @@ -856,7 +831,7 @@ static int el3_rx(struct net_device *dev) int worklimit = 32; short rx_status; - DEBUG(3, "%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n", + pr_debug("%s: in rx_packet(), status %4.4x, rx_status %4.4x.\n", dev->name, inw(ioaddr+EL3_STATUS), inw(ioaddr+RX_STATUS)); while (!((rx_status = inw(ioaddr + RX_STATUS)) & 0x8000) && worklimit > 0) { @@ -878,7 +853,7 @@ static int el3_rx(struct net_device *dev) skb = dev_alloc_skb(pkt_len+5); - DEBUG(3, " Receiving packet size %d status %4.4x.\n", + pr_debug(" Receiving packet size %d status %4.4x.\n", pkt_len, rx_status); if (skb != NULL) { skb_reserve(skb, 2); @@ -889,7 +864,7 @@ static int el3_rx(struct net_device *dev) dev->stats.rx_packets++; dev->stats.rx_bytes += pkt_len; } else { - DEBUG(1, "%s: couldn't allocate a sk_buff of" + pr_debug("%s: couldn't allocate a sk_buff of" " size %d.\n", dev->name, pkt_len); dev->stats.rx_dropped++; } @@ -930,7 +905,7 @@ static int el3_close(struct net_device *dev) struct pcmcia_device *link = lp->p_dev; unsigned int ioaddr = dev->base_addr; - DEBUG(1, "%s: shutting down ethercard.\n", dev->name); + dev_dbg(&link->dev, "%s: shutting down ethercard.\n", dev->name); if (pcmcia_dev_present(link)) { /* Turn off statistics ASAP. We update dev->stats below. */ diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c index 3131a59..5af2ccf 100644 --- a/drivers/net/pcmcia/axnet_cs.c +++ b/drivers/net/pcmcia/axnet_cs.c @@ -75,16 +75,6 @@ MODULE_AUTHOR("David Hinds "); MODULE_DESCRIPTION("Asix AX88190 PCMCIA ethernet driver"); MODULE_LICENSE("GPL"); -#ifdef PCMCIA_DEBUG -#define INT_MODULE_PARM(n, v) static int n = v; module_param(n, int, 0) - -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"axnet_cs.c 1.28 2002/06/29 06:27:37 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -167,7 +157,7 @@ static int axnet_probe(struct pcmcia_device *link) struct net_device *dev; struct ei_device *ei_local; - DEBUG(0, "axnet_attach()\n"); + dev_dbg(&link->dev, "axnet_attach()\n"); dev = alloc_etherdev(sizeof(struct ei_device) + sizeof(axnet_dev_t)); if (!dev) @@ -205,7 +195,7 @@ static void axnet_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "axnet_detach(0x%p)\n", link); + dev_dbg(&link->dev, "axnet_detach(0x%p)\n", link); if (link->dev_node) unregister_netdev(dev); @@ -272,9 +262,6 @@ static int get_prom(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int try_io_port(struct pcmcia_device *link) { int j, ret; @@ -341,26 +328,29 @@ static int axnet_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; axnet_dev_t *info = PRIV(dev); - int i, j, j2, last_ret, last_fn; + int i, j, j2, ret; - DEBUG(0, "axnet_config(0x%p)\n", link); + dev_dbg(&link->dev, "axnet_config(0x%p)\n", link); /* don't trust the CIS on this; Linksys got it wrong */ link->conf.Present = 0x63; - last_ret = pcmcia_loop_config(link, axnet_configcheck, NULL); - if (last_ret != 0) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, axnet_configcheck, NULL); + if (ret != 0) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; if (link->io.NumPorts2 == 8) { link->conf.Attributes |= CONF_ENABLE_SPKR; link->conf.Status = CCSR_AUDIO_ENA; } - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; + dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -426,14 +416,12 @@ static int axnet_config(struct pcmcia_device *link) dev->base_addr, dev->irq, dev->dev_addr); if (info->phy_id != -1) { - DEBUG(0, " MII transceiver at index %d, status %x.\n", info->phy_id, j); + dev_dbg(&link->dev, " MII transceiver at index %d, status %x.\n", info->phy_id, j); } else { printk(KERN_NOTICE " No MII transceivers found!\n"); } return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: axnet_release(link); return -ENODEV; @@ -543,7 +531,7 @@ static int axnet_open(struct net_device *dev) struct pcmcia_device *link = info->p_dev; unsigned int nic_base = dev->base_addr; - DEBUG(2, "axnet_open('%s')\n", dev->name); + dev_dbg(&link->dev, "axnet_open('%s')\n", dev->name); if (!pcmcia_dev_present(link)) return -ENODEV; @@ -572,7 +560,7 @@ static int axnet_close(struct net_device *dev) axnet_dev_t *info = PRIV(dev); struct pcmcia_device *link = info->p_dev; - DEBUG(2, "axnet_close('%s')\n", dev->name); + dev_dbg(&link->dev, "axnet_close('%s')\n", dev->name); ax_close(dev); free_irq(dev->irq, dev); @@ -741,10 +729,8 @@ static void block_input(struct net_device *dev, int count, int xfer_count = count; char *buf = skb->data; -#ifdef PCMCIA_DEBUG if ((ei_debug > 4) && (count != 4)) - printk(KERN_DEBUG "%s: [bi=%d]\n", dev->name, count+4); -#endif + pr_debug("%s: [bi=%d]\n", dev->name, count+4); outb_p(ring_offset & 0xff, nic_base + EN0_RSARLO); outb_p(ring_offset >> 8, nic_base + EN0_RSARHI); outb_p(E8390_RREAD+E8390_START, nic_base + AXNET_CMD); @@ -762,10 +748,7 @@ static void block_output(struct net_device *dev, int count, { unsigned int nic_base = dev->base_addr; -#ifdef PCMCIA_DEBUG - if (ei_debug > 4) - printk(KERN_DEBUG "%s: [bo=%d]\n", dev->name, count); -#endif + pr_debug("%s: [bo=%d]\n", dev->name, count); /* Round the count up for word writes. Do we need to do this? What effect will an odd byte count have on the 8390? diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c index 7b5c77b7..9a2e500 100644 --- a/drivers/net/pcmcia/com20020_cs.c +++ b/drivers/net/pcmcia/com20020_cs.c @@ -53,11 +53,7 @@ #define VERSION "arcnet: COM20020 PCMCIA support loaded.\n" -#ifdef PCMCIA_DEBUG - -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) +#ifdef DEBUG static void regdump(struct net_device *dev) { @@ -92,7 +88,6 @@ static void regdump(struct net_device *dev) #else -#define DEBUG(n, args...) do { } while (0) static inline void regdump(struct net_device *dev) { } #endif @@ -144,7 +139,7 @@ static int com20020_probe(struct pcmcia_device *p_dev) struct net_device *dev; struct arcnet_local *lp; - DEBUG(0, "com20020_attach()\n"); + dev_dbg(&p_dev->dev, "com20020_attach()\n"); /* Create new network device */ info = kzalloc(sizeof(struct com20020_dev_t), GFP_KERNEL); @@ -198,12 +193,12 @@ static void com20020_detach(struct pcmcia_device *link) struct com20020_dev_t *info = link->priv; struct net_device *dev = info->dev; - DEBUG(1,"detach...\n"); + dev_dbg(&link->dev, "detach...\n"); - DEBUG(0, "com20020_detach(0x%p)\n", link); + dev_dbg(&link->dev, "com20020_detach\n"); if (link->dev_node) { - DEBUG(1,"unregister...\n"); + dev_dbg(&link->dev, "unregister...\n"); unregister_netdev(dev); @@ -218,16 +213,16 @@ static void com20020_detach(struct pcmcia_device *link) com20020_release(link); /* Unlink device structure, free bits */ - DEBUG(1,"unlinking...\n"); + dev_dbg(&link->dev, "unlinking...\n"); if (link->priv) { dev = info->dev; if (dev) { - DEBUG(1,"kfree...\n"); + dev_dbg(&link->dev, "kfree...\n"); free_netdev(dev); } - DEBUG(1,"kfree2...\n"); + dev_dbg(&link->dev, "kfree2...\n"); kfree(info); } @@ -241,25 +236,22 @@ static void com20020_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int com20020_config(struct pcmcia_device *link) { struct arcnet_local *lp; com20020_dev_t *info; struct net_device *dev; - int i, last_ret, last_fn; + int i, ret; int ioaddr; info = link->priv; dev = info->dev; - DEBUG(1,"config...\n"); + dev_dbg(&link->dev, "config...\n"); - DEBUG(0, "com20020_config(0x%p)\n", link); + dev_dbg(&link->dev, "com20020_config\n"); - DEBUG(1,"arcnet: baseport1 is %Xh\n", link->io.BasePort1); + dev_dbg(&link->dev, "baseport1 is %Xh\n", link->io.BasePort1); i = -ENODEV; if (!link->io.BasePort1) { @@ -276,26 +268,28 @@ static int com20020_config(struct pcmcia_device *link) if (i != 0) { - DEBUG(1,"arcnet: requestIO failed totally!\n"); + dev_dbg(&link->dev, "requestIO failed totally!\n"); goto failed; } ioaddr = dev->base_addr = link->io.BasePort1; - DEBUG(1,"arcnet: got ioaddr %Xh\n", ioaddr); + dev_dbg(&link->dev, "got ioaddr %Xh\n", ioaddr); - DEBUG(1,"arcnet: request IRQ %d (%Xh/%Xh)\n", + dev_dbg(&link->dev, "request IRQ %d (%Xh/%Xh)\n", link->irq.AssignedIRQ, link->irq.IRQInfo1, link->irq.IRQInfo2); i = pcmcia_request_irq(link, &link->irq); if (i != 0) { - DEBUG(1,"arcnet: requestIRQ failed totally!\n"); + dev_dbg(&link->dev, "requestIRQ failed totally!\n"); goto failed; } dev->irq = link->irq.AssignedIRQ; - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; if (com20020_check(dev)) { @@ -313,21 +307,20 @@ static int com20020_config(struct pcmcia_device *link) i = com20020_found(dev, 0); /* calls register_netdev */ if (i != 0) { - DEBUG(1,KERN_NOTICE "com20020_cs: com20020_found() failed\n"); + dev_printk(KERN_NOTICE, &link->dev, + "com20020_cs: com20020_found() failed\n"); link->dev_node = NULL; goto failed; } strcpy(info->node.dev_name, dev->name); - DEBUG(1,KERN_INFO "%s: port %#3lx, irq %d\n", + dev_dbg(&link->dev,KERN_INFO "%s: port %#3lx, irq %d\n", dev->name, dev->base_addr, dev->irq); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: - DEBUG(1,"com20020_config failed...\n"); + dev_dbg(&link->dev, "com20020_config failed...\n"); com20020_release(link); return -ENODEV; } /* com20020_config */ @@ -342,7 +335,7 @@ failed: static void com20020_release(struct pcmcia_device *link) { - DEBUG(0, "com20020_release(0x%p)\n", link); + dev_dbg(&link->dev, "com20020_release\n"); pcmcia_disable_device(link); } diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 5b73b11..fdec5c3 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -72,13 +72,6 @@ MODULE_LICENSE("GPL"); /* 0:4KB*2 TX buffer else:8KB*2 TX buffer */ INT_MODULE_PARM(sram_config, 0); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = DRV_NAME ".c " DRV_VERSION " 2002/03/23"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ /* @@ -245,7 +238,7 @@ static int fmvj18x_probe(struct pcmcia_device *link) local_info_t *lp; struct net_device *dev; - DEBUG(0, "fmvj18x_attach()\n"); + dev_dbg(&link->dev, "fmvj18x_attach()\n"); /* Make up a FMVJ18x specific data structure */ dev = alloc_etherdev(sizeof(local_info_t)); @@ -285,7 +278,7 @@ static void fmvj18x_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "fmvj18x_detach(0x%p)\n", link); + dev_dbg(&link->dev, "fmvj18x_detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -297,9 +290,6 @@ static void fmvj18x_detach(struct pcmcia_device *link) /*====================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int mfc_try_io_port(struct pcmcia_device *link) { int i, ret; @@ -354,7 +344,7 @@ static int fmvj18x_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; local_info_t *lp = netdev_priv(dev); - int i, last_fn = RequestIO, last_ret = 0, ret; + int i, ret; unsigned int ioaddr; cardtype_t cardtype; char *card_name = "unknown"; @@ -362,16 +352,16 @@ static int fmvj18x_config(struct pcmcia_device *link) size_t len; u_char buggybuf[32]; - DEBUG(0, "fmvj18x_config(0x%p)\n", link); + dev_dbg(&link->dev, "fmvj18x_config\n"); len = pcmcia_get_tuple(link, CISTPL_FUNCE, &buf); kfree(buf); if (len) { /* Yes, I have CISTPL_FUNCE. Let's check CISTPL_MANFID */ - last_ret = pcmcia_loop_config(link, fmvj18x_ioprobe, NULL); - if (last_ret != 0) - goto cs_failed; + ret = pcmcia_loop_config(link, fmvj18x_ioprobe, NULL); + if (ret != 0) + goto failed; switch (link->manf_id) { case MANFID_TDK: @@ -440,15 +430,22 @@ static int fmvj18x_config(struct pcmcia_device *link) link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT; ret = mfc_try_io_port(link); - if (ret != 0) goto cs_failed; + if (ret != 0) goto failed; } else if (cardtype == UNGERMANN) { ret = ungermann_try_io_port(link); - if (ret != 0) goto cs_failed; + if (ret != 0) goto failed; } else { - CS_CHECK(RequestIO, pcmcia_request_io(link, &link->io)); + ret = pcmcia_request_io(link, &link->io); + if (ret) + goto failed; } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; + dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -553,9 +550,6 @@ static int fmvj18x_config(struct pcmcia_device *link) return 0; -cs_failed: - /* All Card Services errors end up here */ - cs_error(link, last_fn, last_ret); failed: fmvj18x_release(link); return -ENODEV; @@ -574,10 +568,8 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) req.Base = 0; req.Size = 0; req.AccessSpeed = 0; i = pcmcia_request_window(&link, &req, &link->win); - if (i != 0) { - cs_error(link, RequestWindow, i); + if (i != 0) return -1; - } base = ioremap(req.Base, req.Size); mem.Page = 0; @@ -608,8 +600,6 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) iounmap(base); j = pcmcia_release_window(link->win); - if (j != 0) - cs_error(link, ReleaseWindow, j); return (i != 0x200) ? 0 : -1; } /* fmvj18x_get_hwinfo */ @@ -629,10 +619,8 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link) req.Base = 0; req.Size = 0; req.AccessSpeed = 0; i = pcmcia_request_window(&link, &req, &link->win); - if (i != 0) { - cs_error(link, RequestWindow, i); + if (i != 0) return -1; - } lp->base = ioremap(req.Base, req.Size); if (lp->base == NULL) { @@ -646,7 +634,6 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link) if (i != 0) { iounmap(lp->base); lp->base = NULL; - cs_error(link, MapMemPage, i); return -1; } @@ -673,15 +660,13 @@ static void fmvj18x_release(struct pcmcia_device *link) u_char __iomem *tmp; int j; - DEBUG(0, "fmvj18x_release(0x%p)\n", link); + dev_dbg(&link->dev, "fmvj18x_release\n"); if (lp->base != NULL) { tmp = lp->base; lp->base = NULL; /* set NULL before iounmap */ iounmap(tmp); j = pcmcia_release_window(link->win); - if (j != 0) - cs_error(link, ReleaseWindow, j); } pcmcia_disable_device(link); @@ -790,8 +775,8 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id) outb(tx_stat, ioaddr + TX_STATUS); outb(rx_stat, ioaddr + RX_STATUS); - DEBUG(4, "%s: interrupt, rx_status %02x.\n", dev->name, rx_stat); - DEBUG(4, " tx_status %02x.\n", tx_stat); + pr_debug("%s: interrupt, rx_status %02x.\n", dev->name, rx_stat); + pr_debug(" tx_status %02x.\n", tx_stat); if (rx_stat || (inb(ioaddr + RX_MODE) & F_BUF_EMP) == 0) { /* there is packet(s) in rx buffer */ @@ -811,8 +796,8 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id) } netif_wake_queue(dev); } - DEBUG(4, "%s: exiting interrupt,\n", dev->name); - DEBUG(4, " tx_status %02x, rx_status %02x.\n", tx_stat, rx_stat); + pr_debug("%s: exiting interrupt,\n", dev->name); + pr_debug(" tx_status %02x, rx_status %02x.\n", tx_stat, rx_stat); outb(D_TX_INTR, ioaddr + TX_INTR); outb(D_RX_INTR, ioaddr + RX_INTR); @@ -884,7 +869,7 @@ static netdev_tx_t fjn_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } - DEBUG(4, "%s: Transmitting a packet of length %lu.\n", + pr_debug("%s: Transmitting a packet of length %lu.\n", dev->name, (unsigned long)skb->len); dev->stats.tx_bytes += skb->len; @@ -939,7 +924,7 @@ static void fjn_reset(struct net_device *dev) unsigned int ioaddr = dev->base_addr; int i; - DEBUG(4, "fjn_reset(%s) called.\n",dev->name); + pr_debug("fjn_reset(%s) called.\n",dev->name); /* Reset controller */ if( sram_config == 0 ) @@ -1017,13 +1002,13 @@ static void fjn_rx(struct net_device *dev) unsigned int ioaddr = dev->base_addr; int boguscount = 10; /* 5 -> 10: by agy 19940922 */ - DEBUG(4, "%s: in rx_packet(), rx_status %02x.\n", + pr_debug("%s: in rx_packet(), rx_status %02x.\n", dev->name, inb(ioaddr + RX_STATUS)); while ((inb(ioaddr + RX_MODE) & F_BUF_EMP) == 0) { u_short status = inw(ioaddr + DATAPORT); - DEBUG(4, "%s: Rxing packet mode %02x status %04x.\n", + pr_debug("%s: Rxing packet mode %02x status %04x.\n", dev->name, inb(ioaddr + RX_MODE), status); #ifndef final_version if (status == 0) { @@ -1063,16 +1048,14 @@ static void fjn_rx(struct net_device *dev) (pkt_len + 1) >> 1); skb->protocol = eth_type_trans(skb, dev); -#ifdef PCMCIA_DEBUG - if (pc_debug > 5) { + { int i; - printk(KERN_DEBUG "%s: Rxed packet of length %d: ", - dev->name, pkt_len); + pr_debug("%s: Rxed packet of length %d: ", + dev->name, pkt_len); for (i = 0; i < 14; i++) - printk(" %02x", skb->data[i]); - printk(".\n"); + pr_debug(" %02x", skb->data[i]); + pr_debug(".\n"); } -#endif netif_rx(skb); dev->stats.rx_packets++; @@ -1096,7 +1079,7 @@ static void fjn_rx(struct net_device *dev) } if (i > 0) - DEBUG(5, "%s: Exint Rx packet with mode %02x after " + pr_debug("%s: Exint Rx packet with mode %02x after " "%d ticks.\n", dev->name, inb(ioaddr + RX_MODE), i); } */ @@ -1114,24 +1097,8 @@ static void netdev_get_drvinfo(struct net_device *dev, sprintf(info->bus_info, "PCMCIA 0x%lx", dev->base_addr); } -#ifdef PCMCIA_DEBUG -static u32 netdev_get_msglevel(struct net_device *dev) -{ - return pc_debug; -} - -static void netdev_set_msglevel(struct net_device *dev, u32 level) -{ - pc_debug = level; -} -#endif /* PCMCIA_DEBUG */ - static const struct ethtool_ops netdev_ethtool_ops = { .get_drvinfo = netdev_get_drvinfo, -#ifdef PCMCIA_DEBUG - .get_msglevel = netdev_get_msglevel, - .set_msglevel = netdev_set_msglevel, -#endif /* PCMCIA_DEBUG */ }; static int fjn_config(struct net_device *dev, struct ifmap *map){ @@ -1143,7 +1110,7 @@ static int fjn_open(struct net_device *dev) struct local_info_t *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - DEBUG(4, "fjn_open('%s').\n", dev->name); + pr_debug("fjn_open('%s').\n", dev->name); if (!pcmcia_dev_present(link)) return -ENODEV; @@ -1169,7 +1136,7 @@ static int fjn_close(struct net_device *dev) struct pcmcia_device *link = lp->p_dev; unsigned int ioaddr = dev->base_addr; - DEBUG(4, "fjn_close('%s').\n", dev->name); + pr_debug("fjn_close('%s').\n", dev->name); lp->open_time = 0; netif_stop_queue(dev); diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 06618af..0d914f3 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -69,17 +69,6 @@ #define PCMCIA #include "../tokenring/ibmtr.c" -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"ibmtr_cs.c 1.10 1996/01/06 05:19:00 (Steve Kipisz)\n" -" 2.2.7 1999/05/03 12:00:00 (Mike Phillips)\n" -" 2.4.2 2001/30/28 Midnight (Burt Silverman)\n"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -143,7 +132,7 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link) ibmtr_dev_t *info; struct net_device *dev; - DEBUG(0, "ibmtr_attach()\n"); + dev_dbg(&link->dev, "ibmtr_attach()\n"); /* Create new token-ring device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -190,7 +179,7 @@ static void ibmtr_detach(struct pcmcia_device *link) struct net_device *dev = info->dev; struct tok_info *ti = netdev_priv(dev); - DEBUG(0, "ibmtr_detach(0x%p)\n", link); + dev_dbg(&link->dev, "ibmtr_detach\n"); /* * When the card removal interrupt hits tok_interrupt(), @@ -217,9 +206,6 @@ static void ibmtr_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int __devinit ibmtr_config(struct pcmcia_device *link) { ibmtr_dev_t *info = link->priv; @@ -227,9 +213,9 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) struct tok_info *ti = netdev_priv(dev); win_req_t req; memreq_t mem; - int i, last_ret, last_fn; + int i, ret; - DEBUG(0, "ibmtr_config(0x%p)\n", link); + dev_dbg(&link->dev, "ibmtr_config\n"); link->conf.ConfigIndex = 0x61; @@ -241,11 +227,15 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) if (i != 0) { /* Couldn't get 0xA20-0xA23. Try ALTERNATE at 0xA24-0xA27. */ link->io.BasePort1 = 0xA24; - CS_CHECK(RequestIO, pcmcia_request_io(link, &link->io)); + ret = pcmcia_request_io(link, &link->io); + if (ret) + goto failed; } dev->base_addr = link->io.BasePort1; - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; ti->irq = link->irq.AssignedIRQ; ti->global_int_enable=GLOBAL_INT_ENABLE+((dev->irq==9) ? 2 : dev->irq); @@ -256,11 +246,15 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x2000; req.AccessSpeed = 250; - CS_CHECK(RequestWindow, pcmcia_request_window(&link, &req, &link->win)); + ret = pcmcia_request_window(&link, &req, &link->win); + if (ret) + goto failed; mem.CardOffset = mmiobase; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(link->win, &mem)); + ret = pcmcia_map_mem_page(link->win, &mem); + if (ret) + goto failed; ti->mmio = ioremap(req.Base, req.Size); /* Allocate the SRAM memory window */ @@ -269,17 +263,23 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) req.Base = 0; req.Size = sramsize * 1024; req.AccessSpeed = 250; - CS_CHECK(RequestWindow, pcmcia_request_window(&link, &req, &info->sram_win_handle)); + ret = pcmcia_request_window(&link, &req, &info->sram_win_handle); + if (ret) + goto failed; mem.CardOffset = srambase; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(info->sram_win_handle, &mem)); + ret = pcmcia_map_mem_page(info->sram_win_handle, &mem); + if (ret) + goto failed; ti->sram_base = mem.CardOffset >> 12; ti->sram_virt = ioremap(req.Base, req.Size); ti->sram_phys = req.Base; - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* Set up the Token-Ring Controller Configuration Register and turn on the card. Check the "Local Area Network Credit Card @@ -305,8 +305,6 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) dev->dev_addr); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: ibmtr_release(link); return -ENODEV; @@ -325,7 +323,7 @@ static void ibmtr_release(struct pcmcia_device *link) ibmtr_dev_t *info = link->priv; struct net_device *dev = info->dev; - DEBUG(0, "ibmtr_release(0x%p)\n", link); + dev_dbg(&link->dev, "ibmtr_release\n"); if (link->win) { struct tok_info *ti = netdev_priv(dev); diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c index 4b96b35..a536348 100644 --- a/drivers/net/pcmcia/nmclan_cs.c +++ b/drivers/net/pcmcia/nmclan_cs.c @@ -381,13 +381,6 @@ typedef struct _mace_private { Private Global Variables ---------------------------------------------------------------------------- */ -#ifdef PCMCIA_DEBUG -static char rcsid[] = -"nmclan_cs.c,v 0.16 1995/07/01 06:42:17 rpao Exp rpao"; -static char *version = -DRV_NAME " " DRV_VERSION " (Roger C. Pao)"; -#endif - static const char *if_names[]={ "Auto", "10baseT", "BNC", }; @@ -406,12 +399,6 @@ MODULE_LICENSE("GPL"); /* 0=auto, 1=10baseT, 2 = 10base2, default=auto */ INT_MODULE_PARM(if_port, 0); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -#else -#define DEBUG(n, args...) -#endif /* ---------------------------------------------------------------------------- Function Prototypes @@ -462,8 +449,7 @@ static int nmclan_probe(struct pcmcia_device *link) mace_private *lp; struct net_device *dev; - DEBUG(0, "nmclan_attach()\n"); - DEBUG(1, "%s\n", rcsid); + dev_dbg(&link->dev, "nmclan_attach()\n"); /* Create new ethernet device */ dev = alloc_etherdev(sizeof(mace_private)); @@ -507,7 +493,7 @@ static void nmclan_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "nmclan_detach(0x%p)\n", link); + dev_dbg(&link->dev, "nmclan_detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -654,23 +640,27 @@ nmclan_config ethernet device available to the system. ---------------------------------------------------------------------------- */ -#define CS_CHECK(fn, ret) \ - do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int nmclan_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; mace_private *lp = netdev_priv(dev); u8 *buf; size_t len; - int i, last_ret, last_fn; + int i, ret; unsigned int ioaddr; - DEBUG(0, "nmclan_config(0x%p)\n", link); + dev_dbg(&link->dev, "nmclan_config\n"); + + ret = pcmcia_request_io(link, &link->io); + if (ret) + goto failed; + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; - CS_CHECK(RequestIO, pcmcia_request_io(link, &link->io)); - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -692,7 +682,7 @@ static int nmclan_config(struct pcmcia_device *link) sig[0] = mace_read(lp, ioaddr, MACE_CHIPIDL); sig[1] = mace_read(lp, ioaddr, MACE_CHIPIDH); if ((sig[0] == 0x40) && ((sig[1] & 0x0F) == 0x09)) { - DEBUG(0, "nmclan_cs configured: mace id=%x %x\n", + dev_dbg(&link->dev, "nmclan_cs configured: mace id=%x %x\n", sig[0], sig[1]); } else { printk(KERN_NOTICE "nmclan_cs: mace id not found: %x %x should" @@ -728,8 +718,6 @@ static int nmclan_config(struct pcmcia_device *link) dev->dev_addr); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: nmclan_release(link); return -ENODEV; @@ -743,7 +731,7 @@ nmclan_release ---------------------------------------------------------------------------- */ static void nmclan_release(struct pcmcia_device *link) { - DEBUG(0, "nmclan_release(0x%p)\n", link); + dev_dbg(&link->dev, "nmclan_release\n"); pcmcia_disable_device(link); } @@ -794,7 +782,7 @@ static void nmclan_reset(struct net_device *dev) /* Reset Xilinx */ reg.Action = CS_WRITE; reg.Offset = CISREG_COR; - DEBUG(1, "nmclan_reset: OrigCorValue=0x%lX, resetting...\n", + dev_dbg(&link->dev, "nmclan_reset: OrigCorValue=0x%lX, resetting...\n", OrigCorValue); reg.Value = COR_SOFT_RESET; pcmcia_access_configuration_register(link, ®); @@ -871,7 +859,7 @@ static int mace_close(struct net_device *dev) mace_private *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - DEBUG(2, "%s: shutting down ethercard.\n", dev->name); + dev_dbg(&link->dev, "%s: shutting down ethercard.\n", dev->name); /* Mask off all interrupts from the MACE chip. */ outb(0xFF, ioaddr + AM2150_MACE_BASE + MACE_IMR); @@ -890,24 +878,8 @@ static void netdev_get_drvinfo(struct net_device *dev, sprintf(info->bus_info, "PCMCIA 0x%lx", dev->base_addr); } -#ifdef PCMCIA_DEBUG -static u32 netdev_get_msglevel(struct net_device *dev) -{ - return pc_debug; -} - -static void netdev_set_msglevel(struct net_device *dev, u32 level) -{ - pc_debug = level; -} -#endif /* PCMCIA_DEBUG */ - static const struct ethtool_ops netdev_ethtool_ops = { .get_drvinfo = netdev_get_drvinfo, -#ifdef PCMCIA_DEBUG - .get_msglevel = netdev_get_msglevel, - .set_msglevel = netdev_set_msglevel, -#endif /* PCMCIA_DEBUG */ }; /* ---------------------------------------------------------------------------- @@ -945,7 +917,7 @@ static netdev_tx_t mace_start_xmit(struct sk_buff *skb, netif_stop_queue(dev); - DEBUG(3, "%s: mace_start_xmit(length = %ld) called.\n", + pr_debug("%s: mace_start_xmit(length = %ld) called.\n", dev->name, (long)skb->len); #if (!TX_INTERRUPTABLE) @@ -1007,7 +979,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) int IntrCnt = MACE_MAX_IR_ITERATIONS; if (dev == NULL) { - DEBUG(2, "mace_interrupt(): irq 0x%X for unknown device.\n", + pr_debug("mace_interrupt(): irq 0x%X for unknown device.\n", irq); return IRQ_NONE; } @@ -1030,7 +1002,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) } if (!netif_device_present(dev)) { - DEBUG(2, "%s: interrupt from dead card\n", dev->name); + pr_debug("%s: interrupt from dead card\n", dev->name); return IRQ_NONE; } @@ -1038,7 +1010,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id) /* WARNING: MACE_IR is a READ/CLEAR port! */ status = inb(ioaddr + AM2150_MACE_BASE + MACE_IR); - DEBUG(3, "mace_interrupt: irq 0x%X status 0x%X.\n", irq, status); + pr_debug("mace_interrupt: irq 0x%X status 0x%X.\n", irq, status); if (status & MACE_IR_RCVINT) { mace_rx(dev, MACE_MAX_RX_ITERATIONS); @@ -1157,7 +1129,7 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt) ) { rx_status = inw(ioaddr + AM2150_RCV); - DEBUG(3, "%s: in mace_rx(), framecnt 0x%X, rx_status" + pr_debug("%s: in mace_rx(), framecnt 0x%X, rx_status" " 0x%X.\n", dev->name, rx_framecnt, rx_status); if (rx_status & MACE_RCVFS_RCVSTS) { /* Error, update stats. */ @@ -1184,7 +1156,7 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt) lp->mace_stats.rfs_rcvcc += inb(ioaddr + AM2150_RCV); /* rcv collision count */ - DEBUG(3, " receiving packet size 0x%X rx_status" + pr_debug(" receiving packet size 0x%X rx_status" " 0x%X.\n", pkt_len, rx_status); skb = dev_alloc_skb(pkt_len+2); @@ -1203,7 +1175,7 @@ static int mace_rx(struct net_device *dev, unsigned char RxCnt) outb(0xFF, ioaddr + AM2150_RCV_NEXT); /* skip to next frame */ continue; } else { - DEBUG(1, "%s: couldn't allocate a sk_buff of size" + pr_debug("%s: couldn't allocate a sk_buff of size" " %d.\n", dev->name, pkt_len); lp->linux_stats.rx_dropped++; } @@ -1219,28 +1191,28 @@ pr_linux_stats ---------------------------------------------------------------------------- */ static void pr_linux_stats(struct net_device_stats *pstats) { - DEBUG(2, "pr_linux_stats\n"); - DEBUG(2, " rx_packets=%-7ld tx_packets=%ld\n", + pr_debug("pr_linux_stats\n"); + pr_debug(" rx_packets=%-7ld tx_packets=%ld\n", (long)pstats->rx_packets, (long)pstats->tx_packets); - DEBUG(2, " rx_errors=%-7ld tx_errors=%ld\n", + pr_debug(" rx_errors=%-7ld tx_errors=%ld\n", (long)pstats->rx_errors, (long)pstats->tx_errors); - DEBUG(2, " rx_dropped=%-7ld tx_dropped=%ld\n", + pr_debug(" rx_dropped=%-7ld tx_dropped=%ld\n", (long)pstats->rx_dropped, (long)pstats->tx_dropped); - DEBUG(2, " multicast=%-7ld collisions=%ld\n", + pr_debug(" multicast=%-7ld collisions=%ld\n", (long)pstats->multicast, (long)pstats->collisions); - DEBUG(2, " rx_length_errors=%-7ld rx_over_errors=%ld\n", + pr_debug(" rx_length_errors=%-7ld rx_over_errors=%ld\n", (long)pstats->rx_length_errors, (long)pstats->rx_over_errors); - DEBUG(2, " rx_crc_errors=%-7ld rx_frame_errors=%ld\n", + pr_debug(" rx_crc_errors=%-7ld rx_frame_errors=%ld\n", (long)pstats->rx_crc_errors, (long)pstats->rx_frame_errors); - DEBUG(2, " rx_fifo_errors=%-7ld rx_missed_errors=%ld\n", + pr_debug(" rx_fifo_errors=%-7ld rx_missed_errors=%ld\n", (long)pstats->rx_fifo_errors, (long)pstats->rx_missed_errors); - DEBUG(2, " tx_aborted_errors=%-7ld tx_carrier_errors=%ld\n", + pr_debug(" tx_aborted_errors=%-7ld tx_carrier_errors=%ld\n", (long)pstats->tx_aborted_errors, (long)pstats->tx_carrier_errors); - DEBUG(2, " tx_fifo_errors=%-7ld tx_heartbeat_errors=%ld\n", + pr_debug(" tx_fifo_errors=%-7ld tx_heartbeat_errors=%ld\n", (long)pstats->tx_fifo_errors, (long)pstats->tx_heartbeat_errors); - DEBUG(2, " tx_window_errors=%ld\n", + pr_debug(" tx_window_errors=%ld\n", (long)pstats->tx_window_errors); } /* pr_linux_stats */ @@ -1249,48 +1221,48 @@ pr_mace_stats ---------------------------------------------------------------------------- */ static void pr_mace_stats(mace_statistics *pstats) { - DEBUG(2, "pr_mace_stats\n"); + pr_debug("pr_mace_stats\n"); - DEBUG(2, " xmtsv=%-7d uflo=%d\n", + pr_debug(" xmtsv=%-7d uflo=%d\n", pstats->xmtsv, pstats->uflo); - DEBUG(2, " lcol=%-7d more=%d\n", + pr_debug(" lcol=%-7d more=%d\n", pstats->lcol, pstats->more); - DEBUG(2, " one=%-7d defer=%d\n", + pr_debug(" one=%-7d defer=%d\n", pstats->one, pstats->defer); - DEBUG(2, " lcar=%-7d rtry=%d\n", + pr_debug(" lcar=%-7d rtry=%d\n", pstats->lcar, pstats->rtry); /* MACE_XMTRC */ - DEBUG(2, " exdef=%-7d xmtrc=%d\n", + pr_debug(" exdef=%-7d xmtrc=%d\n", pstats->exdef, pstats->xmtrc); /* RFS1--Receive Status (RCVSTS) */ - DEBUG(2, " oflo=%-7d clsn=%d\n", + pr_debug(" oflo=%-7d clsn=%d\n", pstats->oflo, pstats->clsn); - DEBUG(2, " fram=%-7d fcs=%d\n", + pr_debug(" fram=%-7d fcs=%d\n", pstats->fram, pstats->fcs); /* RFS2--Runt Packet Count (RNTPC) */ /* RFS3--Receive Collision Count (RCVCC) */ - DEBUG(2, " rfs_rntpc=%-7d rfs_rcvcc=%d\n", + pr_debug(" rfs_rntpc=%-7d rfs_rcvcc=%d\n", pstats->rfs_rntpc, pstats->rfs_rcvcc); /* MACE_IR */ - DEBUG(2, " jab=%-7d babl=%d\n", + pr_debug(" jab=%-7d babl=%d\n", pstats->jab, pstats->babl); - DEBUG(2, " cerr=%-7d rcvcco=%d\n", + pr_debug(" cerr=%-7d rcvcco=%d\n", pstats->cerr, pstats->rcvcco); - DEBUG(2, " rntpco=%-7d mpco=%d\n", + pr_debug(" rntpco=%-7d mpco=%d\n", pstats->rntpco, pstats->mpco); /* MACE_MPC */ - DEBUG(2, " mpc=%d\n", pstats->mpc); + pr_debug(" mpc=%d\n", pstats->mpc); /* MACE_RNTPC */ - DEBUG(2, " rntpc=%d\n", pstats->rntpc); + pr_debug(" rntpc=%d\n", pstats->rntpc); /* MACE_RCVCC */ - DEBUG(2, " rcvcc=%d\n", pstats->rcvcc); + pr_debug(" rcvcc=%d\n", pstats->rcvcc); } /* pr_mace_stats */ @@ -1359,7 +1331,7 @@ static struct net_device_stats *mace_get_stats(struct net_device *dev) update_stats(dev->base_addr, dev); - DEBUG(1, "%s: updating the statistics.\n", dev->name); + pr_debug("%s: updating the statistics.\n", dev->name); pr_linux_stats(&lp->linux_stats); pr_mace_stats(&lp->mace_stats); @@ -1426,7 +1398,7 @@ static void BuildLAF(int *ladrf, int *adr) ladrf[byte] |= (1 << (hashcode & 7)); #ifdef PCMCIA_DEBUG - if (pc_debug > 2) + if (0) printk(KERN_DEBUG " adr =%pM\n", adr); printk(KERN_DEBUG " hashcode = %d(decimal), ladrf[0:63] =", hashcode); for (i = 0; i < 8; i++) @@ -1453,12 +1425,12 @@ static void restore_multicast_list(struct net_device *dev) unsigned int ioaddr = dev->base_addr; int i; - DEBUG(2, "%s: restoring Rx mode to %d addresses.\n", + pr_debug("%s: restoring Rx mode to %d addresses.\n", dev->name, num_addrs); if (num_addrs > 0) { - DEBUG(1, "Attempt to restore multicast list detected.\n"); + pr_debug("Attempt to restore multicast list detected.\n"); mace_write(lp, ioaddr, MACE_IAC, MACE_IAC_ADDRCHG | MACE_IAC_LOGADDR); /* Poll ADDRCHG bit */ @@ -1510,11 +1482,11 @@ static void set_multicast_list(struct net_device *dev) struct dev_mc_list *dmi = dev->mc_list; #ifdef PCMCIA_DEBUG - if (pc_debug > 1) { + { static int old; if (dev->mc_count != old) { old = dev->mc_count; - DEBUG(0, "%s: setting Rx mode to %d addresses.\n", + pr_debug("%s: setting Rx mode to %d addresses.\n", dev->name, old); } } @@ -1545,7 +1517,7 @@ static void restore_multicast_list(struct net_device *dev) unsigned int ioaddr = dev->base_addr; mace_private *lp = netdev_priv(dev); - DEBUG(2, "%s: restoring Rx mode to %d addresses.\n", dev->name, + pr_debug("%s: restoring Rx mode to %d addresses.\n", dev->name, lp->multicast_num_addrs); if (dev->flags & IFF_PROMISC) { @@ -1566,11 +1538,11 @@ static void set_multicast_list(struct net_device *dev) mace_private *lp = netdev_priv(dev); #ifdef PCMCIA_DEBUG - if (pc_debug > 1) { + { static int old; if (dev->mc_count != old) { old = dev->mc_count; - DEBUG(0, "%s: setting Rx mode to %d addresses.\n", + pr_debug("%s: setting Rx mode to %d addresses.\n", dev->name, old); } } diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index bd3447f..80ab9de 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -71,15 +71,6 @@ static const char *if_names[] = { "auto", "10baseT", "10base2"}; -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"pcnet_cs.c 1.153 2003/11/09 18:53:09 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -265,7 +256,7 @@ static int pcnet_probe(struct pcmcia_device *link) pcnet_dev_t *info; struct net_device *dev; - DEBUG(0, "pcnet_attach()\n"); + dev_dbg(&link->dev, "pcnet_attach()\n"); /* Create new ethernet device */ dev = __alloc_ei_netdev(sizeof(pcnet_dev_t)); @@ -297,7 +288,7 @@ static void pcnet_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "pcnet_detach(0x%p)\n", link); + dev_dbg(&link->dev, "pcnet_detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -327,10 +318,8 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) req.Base = 0; req.Size = 0; req.AccessSpeed = 0; i = pcmcia_request_window(&link, &req, &link->win); - if (i != 0) { - cs_error(link, RequestWindow, i); + if (i != 0) return NULL; - } virt = ioremap(req.Base, req.Size); mem.Page = 0; @@ -349,8 +338,6 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) iounmap(virt); j = pcmcia_release_window(link->win); - if (j != 0) - cs_error(link, ReleaseWindow, j); return (i < NR_INFO) ? hw_info+i : NULL; } /* get_hwinfo */ @@ -495,9 +482,6 @@ static hw_info_t *get_hwired(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int try_io_port(struct pcmcia_device *link) { int j, ret; @@ -567,19 +551,19 @@ static int pcnet_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; pcnet_dev_t *info = PRIV(dev); - int last_ret, last_fn, start_pg, stop_pg, cm_offset; + int ret, start_pg, stop_pg, cm_offset; int has_shmem = 0; hw_info_t *local_hw_info; - DEBUG(0, "pcnet_config(0x%p)\n", link); + dev_dbg(&link->dev, "pcnet_config\n"); - last_ret = pcmcia_loop_config(link, pcnet_confcheck, &has_shmem); - if (last_ret) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, pcnet_confcheck, &has_shmem); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; if (link->io.NumPorts2 == 8) { link->conf.Attributes |= CONF_ENABLE_SPKR; @@ -589,7 +573,9 @@ static int pcnet_config(struct pcmcia_device *link) (link->card_id == PRODID_IBM_HOME_AND_AWAY)) link->conf.ConfigIndex |= 0x10; - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; if (info->flags & HAS_MISC_REG) { @@ -687,8 +673,6 @@ static int pcnet_config(struct pcmcia_device *link) printk(" hw_addr %pM\n", dev->dev_addr); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: pcnet_release(link); return -ENODEV; @@ -706,7 +690,7 @@ static void pcnet_release(struct pcmcia_device *link) { pcnet_dev_t *info = PRIV(link->priv); - DEBUG(0, "pcnet_release(0x%p)\n", link); + dev_dbg(&link->dev, "pcnet_release\n"); if (info->flags & USE_SHMEM) iounmap(info->base); @@ -960,7 +944,7 @@ static void mii_phy_probe(struct net_device *dev) phyid = tmp << 16; phyid |= mdio_read(mii_addr, i, MII_PHYID_REG2); phyid &= MII_PHYID_REV_MASK; - DEBUG(0, "%s: MII at %d is 0x%08x\n", dev->name, i, phyid); + pr_debug("%s: MII at %d is 0x%08x\n", dev->name, i, phyid); if (phyid == AM79C9XX_HOME_PHY) { info->pna_phy = i; } else if (phyid != AM79C9XX_ETH_PHY) { @@ -976,7 +960,7 @@ static int pcnet_open(struct net_device *dev) struct pcmcia_device *link = info->p_dev; unsigned int nic_base = dev->base_addr; - DEBUG(2, "pcnet_open('%s')\n", dev->name); + dev_dbg(&link->dev, "pcnet_open('%s')\n", dev->name); if (!pcmcia_dev_present(link)) return -ENODEV; @@ -1008,7 +992,7 @@ static int pcnet_close(struct net_device *dev) pcnet_dev_t *info = PRIV(dev); struct pcmcia_device *link = info->p_dev; - DEBUG(2, "pcnet_close('%s')\n", dev->name); + dev_dbg(&link->dev, "pcnet_close('%s')\n", dev->name); ei_close(dev); free_irq(dev->irq, dev); @@ -1251,10 +1235,8 @@ static void dma_block_input(struct net_device *dev, int count, int xfer_count = count; char *buf = skb->data; -#ifdef PCMCIA_DEBUG if ((ei_debug > 4) && (count != 4)) - printk(KERN_DEBUG "%s: [bi=%d]\n", dev->name, count+4); -#endif + pr_debug("%s: [bi=%d]\n", dev->name, count+4); if (ei_status.dmaing) { printk(KERN_NOTICE "%s: DMAing conflict in dma_block_input." "[DMAstat:%1x][irqlock:%1x]\n", @@ -1495,7 +1477,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, pcnet_dev_t *info = PRIV(dev); win_req_t req; memreq_t mem; - int i, window_size, offset, last_ret, last_fn; + int i, window_size, offset, ret; window_size = (stop_pg - start_pg) << 8; if (window_size > 32 * 1024) @@ -1509,13 +1491,17 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, req.Attributes |= WIN_USE_WAIT; req.Base = 0; req.Size = window_size; req.AccessSpeed = mem_speed; - CS_CHECK(RequestWindow, pcmcia_request_window(&link, &req, &link->win)); + ret = pcmcia_request_window(&link, &req, &link->win); + if (ret) + goto failed; mem.CardOffset = (start_pg << 8) + cm_offset; offset = mem.CardOffset % window_size; mem.CardOffset -= offset; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(link->win, &mem)); + ret = pcmcia_map_mem_page(link->win, &mem); + if (ret) + goto failed; /* Try scribbling on the buffer */ info->base = ioremap(req.Base, window_size); @@ -1549,8 +1535,6 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, info->flags |= USE_SHMEM; return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: return 1; } @@ -1788,7 +1772,6 @@ static int __init init_pcnet_cs(void) static void __exit exit_pcnet_cs(void) { - DEBUG(0, "pcnet_cs: unloading\n"); pcmcia_unregister_driver(&pcnet_driver); } diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index df92bcd..580ec44 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -79,14 +79,6 @@ MODULE_FIRMWARE(FIRMWARE_NAME); */ INT_MODULE_PARM(if_port, 0); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -static const char *version = -"smc91c92_cs.c 1.123 2006/11/09 Donald Becker, becker@scyld.com.\n"; -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -#else -#define DEBUG(n, args...) -#endif #define DRV_NAME "smc91c92_cs" #define DRV_VERSION "1.123" @@ -323,7 +315,7 @@ static int smc91c92_probe(struct pcmcia_device *link) struct smc_private *smc; struct net_device *dev; - DEBUG(0, "smc91c92_attach()\n"); + dev_dbg(&link->dev, "smc91c92_attach()\n"); /* Create new ethernet device */ dev = alloc_etherdev(sizeof(struct smc_private)); @@ -371,7 +363,7 @@ static void smc91c92_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "smc91c92_detach(0x%p)\n", link); + dev_dbg(&link->dev, "smc91c92_detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -746,7 +738,7 @@ static int osi_setup(struct pcmcia_device *link, u_short manfid, u_short cardid) set_bits(0x300, link->io.BasePort1 + OSITECH_AUI_PWR); /* Now, turn on the interrupt for both card functions */ set_bits(0x300, link->io.BasePort1 + OSITECH_RESET_ISR); - DEBUG(2, "AUI/PWR: %4.4x RESET/ISR: %4.4x\n", + dev_dbg(&link->dev, "AUI/PWR: %4.4x RESET/ISR: %4.4x\n", inw(link->io.BasePort1 + OSITECH_AUI_PWR), inw(link->io.BasePort1 + OSITECH_RESET_ISR)); } @@ -860,12 +852,6 @@ static int check_sig(struct pcmcia_device *link) ======================================================================*/ -#define CS_EXIT_TEST(ret, svc, label) \ -if (ret != 0) { \ - cs_error(link, svc, ret); \ - goto label; \ -} - static int smc91c92_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; @@ -875,7 +861,7 @@ static int smc91c92_config(struct pcmcia_device *link) unsigned int ioaddr; u_long mir; - DEBUG(0, "smc91c92_config(0x%p)\n", link); + dev_dbg(&link->dev, "smc91c92_config\n"); smc->manfid = link->manf_id; smc->cardid = link->card_id; @@ -891,12 +877,15 @@ static int smc91c92_config(struct pcmcia_device *link) } else { i = smc_config(link); } - CS_EXIT_TEST(i, RequestIO, config_failed); + if (i) + goto config_failed; i = pcmcia_request_irq(link, &link->irq); - CS_EXIT_TEST(i, RequestIRQ, config_failed); + if (i) + goto config_failed; i = pcmcia_request_configuration(link, &link->conf); - CS_EXIT_TEST(i, RequestConfiguration, config_failed); + if (i) + goto config_failed; if (smc->manfid == MANFID_MOTOROLA) mot_config(link); @@ -1001,7 +990,7 @@ static int smc91c92_config(struct pcmcia_device *link) if (smc->cfg & CFG_MII_SELECT) { if (smc->mii_if.phy_id != -1) { - DEBUG(0, " MII transceiver at index %d, status %x.\n", + dev_dbg(&link->dev, " MII transceiver at index %d, status %x.\n", smc->mii_if.phy_id, j); } else { printk(KERN_NOTICE " No MII transceivers found!\n"); @@ -1011,7 +1000,7 @@ static int smc91c92_config(struct pcmcia_device *link) config_undo: unregister_netdev(dev); -config_failed: /* CS_EXIT_TEST() calls jump to here... */ +config_failed: smc91c92_release(link); return -ENODEV; } /* smc91c92_config */ @@ -1026,7 +1015,7 @@ config_failed: /* CS_EXIT_TEST() calls jump to here... */ static void smc91c92_release(struct pcmcia_device *link) { - DEBUG(0, "smc91c92_release(0x%p)\n", link); + dev_dbg(&link->dev, "smc91c92_release\n"); if (link->win) { struct net_device *dev = link->priv; struct smc_private *smc = netdev_priv(dev); @@ -1123,10 +1112,10 @@ static int smc_open(struct net_device *dev) struct smc_private *smc = netdev_priv(dev); struct pcmcia_device *link = smc->p_dev; -#ifdef PCMCIA_DEBUG - DEBUG(0, "%s: smc_open(%p), ID/Window %4.4x.\n", + dev_dbg(&link->dev, "%s: smc_open(%p), ID/Window %4.4x.\n", dev->name, dev, inw(dev->base_addr + BANK_SELECT)); - if (pc_debug > 1) smc_dump(dev); +#ifdef PCMCIA_DEBUG + smc_dump(dev); #endif /* Check that the PCMCIA card is still here. */ @@ -1161,7 +1150,7 @@ static int smc_close(struct net_device *dev) struct pcmcia_device *link = smc->p_dev; unsigned int ioaddr = dev->base_addr; - DEBUG(0, "%s: smc_close(), status %4.4x.\n", + dev_dbg(&link->dev, "%s: smc_close(), status %4.4x.\n", dev->name, inw(ioaddr + BANK_SELECT)); netif_stop_queue(dev); @@ -1228,7 +1217,7 @@ static void smc_hardware_send_packet(struct net_device * dev) u_char *buf = skb->data; u_int length = skb->len; /* The chip will pad to ethernet min. */ - DEBUG(2, "%s: Trying to xmit packet of length %d.\n", + pr_debug("%s: Trying to xmit packet of length %d.\n", dev->name, length); /* send the packet length: +6 for status word, length, and ctl */ @@ -1283,7 +1272,7 @@ static netdev_tx_t smc_start_xmit(struct sk_buff *skb, netif_stop_queue(dev); - DEBUG(2, "%s: smc_start_xmit(length = %d) called," + pr_debug("%s: smc_start_xmit(length = %d) called," " status %4.4x.\n", dev->name, skb->len, inw(ioaddr + 2)); if (smc->saved_skb) { @@ -1330,7 +1319,7 @@ static netdev_tx_t smc_start_xmit(struct sk_buff *skb, } /* Otherwise defer until the Tx-space-allocated interrupt. */ - DEBUG(2, "%s: memory allocation deferred.\n", dev->name); + pr_debug("%s: memory allocation deferred.\n", dev->name); outw((IM_ALLOC_INT << 8) | (ir & 0xff00), ioaddr + INTERRUPT); spin_unlock_irqrestore(&smc->lock, flags); @@ -1395,7 +1384,7 @@ static void smc_eph_irq(struct net_device *dev) SMC_SELECT_BANK(0); ephs = inw(ioaddr + EPH); - DEBUG(2, "%s: Ethernet protocol handler interrupt, status" + pr_debug("%s: Ethernet protocol handler interrupt, status" " %4.4x.\n", dev->name, ephs); /* Could be a counter roll-over warning: update stats. */ card_stats = inw(ioaddr + COUNTER); @@ -1435,7 +1424,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) ioaddr = dev->base_addr; - DEBUG(3, "%s: SMC91c92 interrupt %d at %#x.\n", dev->name, + pr_debug("%s: SMC91c92 interrupt %d at %#x.\n", dev->name, irq, ioaddr); spin_lock(&smc->lock); @@ -1444,7 +1433,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) if ((saved_bank & 0xff00) != 0x3300) { /* The device does not exist -- the card could be off-line, or maybe it has been ejected. */ - DEBUG(1, "%s: SMC91c92 interrupt %d for non-existent" + pr_debug("%s: SMC91c92 interrupt %d for non-existent" "/ejected device.\n", dev->name, irq); handled = 0; goto irq_done; @@ -1458,7 +1447,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) do { /* read the status flag, and mask it */ status = inw(ioaddr + INTERRUPT) & 0xff; - DEBUG(3, "%s: Status is %#2.2x (mask %#2.2x).\n", dev->name, + pr_debug("%s: Status is %#2.2x (mask %#2.2x).\n", dev->name, status, mask); if ((status & mask) == 0) { if (bogus_cnt == INTR_WORK) @@ -1503,7 +1492,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) smc_eph_irq(dev); } while (--bogus_cnt); - DEBUG(3, " Restoring saved registers mask %2.2x bank %4.4x" + pr_debug(" Restoring saved registers mask %2.2x bank %4.4x" " pointer %4.4x.\n", mask, saved_bank, saved_pointer); /* restore state register */ @@ -1511,7 +1500,7 @@ static irqreturn_t smc_interrupt(int irq, void *dev_id) outw(saved_pointer, ioaddr + POINTER); SMC_SELECT_BANK(saved_bank); - DEBUG(3, "%s: Exiting interrupt IRQ%d.\n", dev->name, irq); + pr_debug("%s: Exiting interrupt IRQ%d.\n", dev->name, irq); irq_done: @@ -1562,7 +1551,7 @@ static void smc_rx(struct net_device *dev) rx_status = inw(ioaddr + DATA_1); packet_length = inw(ioaddr + DATA_1) & 0x07ff; - DEBUG(2, "%s: Receive status %4.4x length %d.\n", + pr_debug("%s: Receive status %4.4x length %d.\n", dev->name, rx_status, packet_length); if (!(rx_status & RS_ERRORS)) { @@ -1573,7 +1562,7 @@ static void smc_rx(struct net_device *dev) skb = dev_alloc_skb(packet_length+2); if (skb == NULL) { - DEBUG(1, "%s: Low memory, packet dropped.\n", dev->name); + pr_debug("%s: Low memory, packet dropped.\n", dev->name); dev->stats.rx_dropped++; outw(MC_RELEASE, ioaddr + MMU_CMD); return; @@ -1733,7 +1722,7 @@ static void smc_reset(struct net_device *dev) struct smc_private *smc = netdev_priv(dev); int i; - DEBUG(0, "%s: smc91c92 reset called.\n", dev->name); + pr_debug("%s: smc91c92 reset called.\n", dev->name); /* The first interaction must be a write to bring the chip out of sleep mode. */ @@ -2050,18 +2039,6 @@ static u32 smc_get_link(struct net_device *dev) return ret; } -#ifdef PCMCIA_DEBUG -static u32 smc_get_msglevel(struct net_device *dev) -{ - return pc_debug; -} - -static void smc_set_msglevel(struct net_device *dev, u32 val) -{ - pc_debug = val; -} -#endif - static int smc_nway_reset(struct net_device *dev) { struct smc_private *smc = netdev_priv(dev); @@ -2085,10 +2062,6 @@ static const struct ethtool_ops ethtool_ops = { .get_settings = smc_get_settings, .set_settings = smc_set_settings, .get_link = smc_get_link, -#ifdef PCMCIA_DEBUG - .get_msglevel = smc_get_msglevel, - .set_msglevel = smc_set_msglevel, -#endif .nway_reset = smc_nway_reset, }; diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index 925e850..fbf9265 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -211,20 +211,6 @@ enum xirc_cmd { /* Commands */ static const char *if_names[] = { "Auto", "10BaseT", "10Base2", "AUI", "100BaseT" }; -/**************** - * All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - * you do not define PCMCIA_DEBUG at all, all the debug code will be - * left out. If you compile with PCMCIA_DEBUG=0, the debug code will - * be present but disabled -- but it can then be enabled for specific - * modules at load time with a 'pc_debug=#' option to insmod. - */ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KDBG_XIRC args) -#else -#define DEBUG(n, args...) -#endif #define KDBG_XIRC KERN_DEBUG "xirc2ps_cs: " #define KERR_XIRC KERN_ERR "xirc2ps_cs: " @@ -378,7 +364,7 @@ static int do_stop(struct net_device *dev); #define PutWord(reg,value) outw((value), ioaddr+(reg)) /*====== Functions used for debugging =================================*/ -#if defined(PCMCIA_DEBUG) && 0 /* reading regs may change system status */ +#if 0 /* reading regs may change system status */ static void PrintRegisters(struct net_device *dev) { @@ -410,7 +396,7 @@ PrintRegisters(struct net_device *dev) } } } -#endif /* PCMCIA_DEBUG */ +#endif /* 0 */ /*============== MII Management functions ===============*/ @@ -554,7 +540,7 @@ xirc2ps_probe(struct pcmcia_device *link) struct net_device *dev; local_info_t *local; - DEBUG(0, "attach()\n"); + dev_dbg(&link->dev, "attach()\n"); /* Allocate the device structure */ dev = alloc_etherdev(sizeof(local_info_t)); @@ -593,7 +579,7 @@ xirc2ps_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "detach(0x%p)\n", link); + dev_dbg(&link->dev, "detach\n"); if (link->dev_node) unregister_netdev(dev); @@ -640,7 +626,7 @@ set_card_type(struct pcmcia_device *link) mediaid = buf[3]; prodid = buf[4]; - DEBUG(0, "cisrev=%02x mediaid=%02x prodid=%02x\n", + dev_dbg(&link->dev, "cisrev=%02x mediaid=%02x prodid=%02x\n", cisrev, mediaid, prodid); local->mohawk = 0; @@ -784,7 +770,7 @@ xirc2ps_config(struct pcmcia_device * link) local->dingo_ccr = NULL; - DEBUG(0, "config(0x%p)\n", link); + dev_dbg(&link->dev, "config\n"); /* Is this a valid card */ if (link->has_manf_id == 0) { @@ -814,7 +800,7 @@ xirc2ps_config(struct pcmcia_device * link) (unsigned)link->manf_id); goto failure; } - DEBUG(0, "found %s card\n", local->manf_str); + dev_dbg(&link->dev, "found %s card\n", local->manf_str); if (!set_card_type(link)) { printk(KNOT_XIRC "this card is not supported\n"); @@ -891,10 +877,8 @@ xirc2ps_config(struct pcmcia_device * link) goto port_found; } link->io.BasePort1 = 0; /* let CS decide */ - if ((err=pcmcia_request_io(link, &link->io))) { - cs_error(link, RequestIO, err); + if ((err=pcmcia_request_io(link, &link->io))) goto config_error; - } } port_found: if (err) @@ -904,19 +888,15 @@ xirc2ps_config(struct pcmcia_device * link) * Now allocate an interrupt line. Note that this does not * actually assign a handler to the interrupt. */ - if ((err=pcmcia_request_irq(link, &link->irq))) { - cs_error(link, RequestIRQ, err); + if ((err=pcmcia_request_irq(link, &link->irq))) goto config_error; - } /**************** * This actually configures the PCMCIA socket -- setting up * the I/O windows and the interrupt mapping. */ - if ((err=pcmcia_request_configuration(link, &link->conf))) { - cs_error(link, RequestConfiguration, err); + if ((err=pcmcia_request_configuration(link, &link->conf))) goto config_error; - } if (local->dingo) { conf_reg_t reg; @@ -931,17 +911,13 @@ xirc2ps_config(struct pcmcia_device * link) reg.Action = CS_WRITE; reg.Offset = CISREG_IOBASE_0; reg.Value = link->io.BasePort2 & 0xff; - if ((err = pcmcia_access_configuration_register(link, ®))) { - cs_error(link, AccessConfigurationRegister, err); + if ((err = pcmcia_access_configuration_register(link, ®))) goto config_error; - } reg.Action = CS_WRITE; reg.Offset = CISREG_IOBASE_1; reg.Value = (link->io.BasePort2 >> 8) & 0xff; - if ((err = pcmcia_access_configuration_register(link, ®))) { - cs_error(link, AccessConfigurationRegister, err); + if ((err = pcmcia_access_configuration_register(link, ®))) goto config_error; - } /* There is no config entry for the Ethernet part which * is at 0x0800. So we allocate a window into the attribute @@ -950,17 +926,14 @@ xirc2ps_config(struct pcmcia_device * link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = req.Size = 0; req.AccessSpeed = 0; - if ((err = pcmcia_request_window(&link, &req, &link->win))) { - cs_error(link, RequestWindow, err); + if ((err = pcmcia_request_window(&link, &req, &link->win))) goto config_error; - } + local->dingo_ccr = ioremap(req.Base,0x1000) + 0x0800; mem.CardOffset = 0x0; mem.Page = 0; - if ((err = pcmcia_map_mem_page(link->win, &mem))) { - cs_error(link, MapMemPage, err); + if ((err = pcmcia_map_mem_page(link->win, &mem))) goto config_error; - } /* Setup the CCRs; there are no infos in the CIS about the Ethernet * part. @@ -1052,7 +1025,7 @@ xirc2ps_config(struct pcmcia_device * link) static void xirc2ps_release(struct pcmcia_device *link) { - DEBUG(0, "release(0x%p)\n", link); + dev_dbg(&link->dev, "release\n"); if (link->win) { struct net_device *dev = link->priv; @@ -1119,7 +1092,7 @@ xirc2ps_interrupt(int irq, void *dev_id) PutByte(XIRCREG_CR, 0); } - DEBUG(6, "%s: interrupt %d at %#x.\n", dev->name, irq, ioaddr); + pr_debug("%s: interrupt %d at %#x.\n", dev->name, irq, ioaddr); saved_page = GetByte(XIRCREG_PR); /* Read the ISR to see whats the cause for the interrupt. @@ -1129,7 +1102,7 @@ xirc2ps_interrupt(int irq, void *dev_id) bytes_rcvd = 0; loop_entry: if (int_status == 0xff) { /* card may be ejected */ - DEBUG(3, "%s: interrupt %d for dead card\n", dev->name, irq); + pr_debug("%s: interrupt %d for dead card\n", dev->name, irq); goto leave; } eth_status = GetByte(XIRCREG_ESR); @@ -1142,7 +1115,7 @@ xirc2ps_interrupt(int irq, void *dev_id) PutByte(XIRCREG40_TXST0, 0); PutByte(XIRCREG40_TXST1, 0); - DEBUG(3, "%s: ISR=%#2.2x ESR=%#2.2x RSR=%#2.2x TSR=%#4.4x\n", + pr_debug("%s: ISR=%#2.2x ESR=%#2.2x RSR=%#2.2x TSR=%#4.4x\n", dev->name, int_status, eth_status, rx_status, tx_status); /***** receive section ******/ @@ -1153,14 +1126,14 @@ xirc2ps_interrupt(int irq, void *dev_id) /* too many bytes received during this int, drop the rest of the * packets */ dev->stats.rx_dropped++; - DEBUG(2, "%s: RX drop, too much done\n", dev->name); + pr_debug("%s: RX drop, too much done\n", dev->name); } else if (rsr & PktRxOk) { struct sk_buff *skb; pktlen = GetWord(XIRCREG0_RBC); bytes_rcvd += pktlen; - DEBUG(5, "rsr=%#02x packet_length=%u\n", rsr, pktlen); + pr_debug("rsr=%#02x packet_length=%u\n", rsr, pktlen); skb = dev_alloc_skb(pktlen+3); /* 1 extra so we can use insw */ if (!skb) { @@ -1228,19 +1201,19 @@ xirc2ps_interrupt(int irq, void *dev_id) dev->stats.multicast++; } } else { /* bad packet */ - DEBUG(5, "rsr=%#02x\n", rsr); + pr_debug("rsr=%#02x\n", rsr); } if (rsr & PktTooLong) { dev->stats.rx_frame_errors++; - DEBUG(3, "%s: Packet too long\n", dev->name); + pr_debug("%s: Packet too long\n", dev->name); } if (rsr & CRCErr) { dev->stats.rx_crc_errors++; - DEBUG(3, "%s: CRC error\n", dev->name); + pr_debug("%s: CRC error\n", dev->name); } if (rsr & AlignErr) { dev->stats.rx_fifo_errors++; /* okay ? */ - DEBUG(3, "%s: Alignment error\n", dev->name); + pr_debug("%s: Alignment error\n", dev->name); } /* clear the received/dropped/error packet */ @@ -1252,7 +1225,7 @@ xirc2ps_interrupt(int irq, void *dev_id) if (rx_status & 0x10) { /* Receive overrun */ dev->stats.rx_over_errors++; PutByte(XIRCREG_CR, ClearRxOvrun); - DEBUG(3, "receive overrun cleared\n"); + pr_debug("receive overrun cleared\n"); } /***** transmit section ******/ @@ -1265,13 +1238,13 @@ xirc2ps_interrupt(int irq, void *dev_id) if (nn < n) /* rollover */ dev->stats.tx_packets += 256 - n; else if (n == nn) { /* happens sometimes - don't know why */ - DEBUG(0, "PTR not changed?\n"); + pr_debug("PTR not changed?\n"); } else dev->stats.tx_packets += lp->last_ptr_value - n; netif_wake_queue(dev); } if (tx_status & 0x0002) { /* Execessive collissions */ - DEBUG(0, "tx restarted due to execssive collissions\n"); + pr_debug("tx restarted due to execssive collissions\n"); PutByte(XIRCREG_CR, RestartTx); /* restart transmitter process */ } if (tx_status & 0x0040) @@ -1290,14 +1263,14 @@ xirc2ps_interrupt(int irq, void *dev_id) maxrx_bytes = 2000; else if (maxrx_bytes > 22000) maxrx_bytes = 22000; - DEBUG(1, "set maxrx=%u (rcvd=%u ticks=%lu)\n", + pr_debug("set maxrx=%u (rcvd=%u ticks=%lu)\n", maxrx_bytes, bytes_rcvd, duration); } else if (!duration && maxrx_bytes < 22000) { /* now much faster */ maxrx_bytes += 2000; if (maxrx_bytes > 22000) maxrx_bytes = 22000; - DEBUG(1, "set maxrx=%u\n", maxrx_bytes); + pr_debug("set maxrx=%u\n", maxrx_bytes); } } @@ -1347,7 +1320,7 @@ do_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned freespace; unsigned pktlen = skb->len; - DEBUG(1, "do_start_xmit(skb=%p, dev=%p) len=%u\n", + pr_debug("do_start_xmit(skb=%p, dev=%p) len=%u\n", skb, dev, pktlen); @@ -1373,7 +1346,7 @@ do_start_xmit(struct sk_buff *skb, struct net_device *dev) freespace &= 0x7fff; /* TRS doesn't work - (indeed it is eliminated with sil-rev 1) */ okay = pktlen +2 < freespace; - DEBUG(2 + (okay ? 2 : 0), "%s: avail. tx space=%u%s\n", + pr_debug("%s: avail. tx space=%u%s\n", dev->name, freespace, okay ? " (okay)":" (not enough)"); if (!okay) { /* not enough space */ return NETDEV_TX_BUSY; /* upper layer may decide to requeue this packet */ @@ -1475,7 +1448,7 @@ do_config(struct net_device *dev, struct ifmap *map) { local_info_t *local = netdev_priv(dev); - DEBUG(0, "do_config(%p)\n", dev); + pr_debug("do_config(%p)\n", dev); if (map->port != 255 && map->port != dev->if_port) { if (map->port > 4) return -EINVAL; @@ -1502,7 +1475,7 @@ do_open(struct net_device *dev) local_info_t *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - DEBUG(0, "do_open(%p)\n", dev); + dev_dbg(&link->dev, "do_open(%p)\n", dev); /* Check that the PCMCIA card is still here. */ /* Physical device present signature. */ @@ -1536,7 +1509,7 @@ do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) unsigned int ioaddr = dev->base_addr; struct mii_ioctl_data *data = if_mii(rq); - DEBUG(1, "%s: ioctl(%-.6s, %#04x) %04x %04x %04x %04x\n", + pr_debug("%s: ioctl(%-.6s, %#04x) %04x %04x %04x %04x\n", dev->name, rq->ifr_ifrn.ifrn_name, cmd, data->phy_id, data->reg_num, data->val_in, data->val_out); @@ -1585,7 +1558,7 @@ do_reset(struct net_device *dev, int full) unsigned int ioaddr = dev->base_addr; unsigned value; - DEBUG(0, "%s: do_reset(%p,%d)\n", dev? dev->name:"eth?", dev, full); + pr_debug("%s: do_reset(%p,%d)\n", dev? dev->name:"eth?", dev, full); hardreset(dev); PutByte(XIRCREG_CR, SoftReset); /* set */ @@ -1623,8 +1596,8 @@ do_reset(struct net_device *dev, int full) } msleep(40); /* wait 40 msec to let it complete */ - #ifdef PCMCIA_DEBUG - if (pc_debug) { + #if 0 + { SelectPage(0); value = GetByte(XIRCREG_ESR); /* read the ESR */ printk(KERN_DEBUG "%s: ESR is: %#02x\n", dev->name, value); @@ -1641,7 +1614,7 @@ do_reset(struct net_device *dev, int full) value |= DisableLinkPulse; PutByte(XIRCREG1_ECR, value); #endif - DEBUG(0, "%s: ECR is: %#02x\n", dev->name, value); + pr_debug("%s: ECR is: %#02x\n", dev->name, value); SelectPage(0x42); PutByte(XIRCREG42_SWC0, 0x20); /* disable source insertion */ @@ -1819,7 +1792,7 @@ do_powerdown(struct net_device *dev) unsigned int ioaddr = dev->base_addr; - DEBUG(0, "do_powerdown(%p)\n", dev); + pr_debug("do_powerdown(%p)\n", dev); SelectPage(4); PutByte(XIRCREG4_GPR1, 0); /* clear bit 0: power down */ @@ -1833,7 +1806,7 @@ do_stop(struct net_device *dev) local_info_t *lp = netdev_priv(dev); struct pcmcia_device *link = lp->p_dev; - DEBUG(0, "do_stop(%p)\n", dev); + dev_dbg(&link->dev, "do_stop(%p)\n", dev); if (!link) return -ENODEV; -- cgit v0.10.2 From 624dd66957e53e15cf40e937b50597c4d41f0e99 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:52:44 +0200 Subject: pcmcia: use dynamic debug infrastructure, deprecate CS_CHECK (ray-cs.c) Convert PCMCIA drivers to use the dynamic debug infrastructure, instead of requiring manual settings of PCMCIA_DEBUG. Also, remove all usages of the CS_CHECK macro and replace them with proper Linux style calling and return value checking. The extra error reporting may be dropped, as the PCMCIA core already complains about any (non-driver-author) errors. CC: linux-wireless@vger.kernel.org CC: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 88cd58e..28db791 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -71,25 +71,7 @@ typedef u_char mac_addr[ETH_ALEN]; /* Hardware address */ #include "rayctl.h" #include "ray_cs.h" -/* All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef RAYLINK_DEBUG -#define PCMCIA_DEBUG RAYLINK_DEBUG -#endif -#ifdef PCMCIA_DEBUG -static int ray_debug; -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -/* #define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); */ -#define DEBUG(n, args...) if (pc_debug > (n)) printk(args); -#else -#define DEBUG(n, args...) -#endif /** Prototypes based on PCMCIA skeleton driver *******************************/ static int ray_config(struct pcmcia_device *link); static void ray_release(struct pcmcia_device *link); @@ -325,7 +307,7 @@ static int ray_probe(struct pcmcia_device *p_dev) ray_dev_t *local; struct net_device *dev; - DEBUG(1, "ray_attach()\n"); + dev_dbg(&p_dev->dev, "ray_attach()\n"); /* Allocate space for private device-specific data */ dev = alloc_etherdev(sizeof(ray_dev_t)); @@ -357,7 +339,7 @@ static int ray_probe(struct pcmcia_device *p_dev) local->card_status = CARD_INSERTED; local->authentication_state = UNAUTHENTICATED; local->num_multi = 0; - DEBUG(2, "ray_attach p_dev = %p, dev = %p, local = %p, intr = %p\n", + dev_dbg(&p_dev->dev, "ray_attach p_dev = %p, dev = %p, local = %p, intr = %p\n", p_dev, dev, local, &ray_interrupt); /* Raylink entries in the device structure */ @@ -370,7 +352,7 @@ static int ray_probe(struct pcmcia_device *p_dev) #endif /* WIRELESS_SPY */ - DEBUG(2, "ray_cs ray_attach calling ether_setup.)\n"); + dev_dbg(&p_dev->dev, "ray_cs ray_attach calling ether_setup.)\n"); netif_stop_queue(dev); init_timer(&local->timer); @@ -393,7 +375,7 @@ static void ray_detach(struct pcmcia_device *link) struct net_device *dev; ray_dev_t *local; - DEBUG(1, "ray_detach(0x%p)\n", link); + dev_dbg(&link->dev, "ray_detach\n"); this_device = NULL; dev = link->priv; @@ -408,7 +390,7 @@ static void ray_detach(struct pcmcia_device *link) unregister_netdev(dev); free_netdev(dev); } - DEBUG(2, "ray_cs ray_detach ending\n"); + dev_dbg(&link->dev, "ray_cs ray_detach ending\n"); } /* ray_detach */ /*============================================================================= @@ -416,19 +398,17 @@ static void ray_detach(struct pcmcia_device *link) is received, to configure the PCMCIA socket, and to make the ethernet device available to the system. =============================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) #define MAX_TUPLE_SIZE 128 static int ray_config(struct pcmcia_device *link) { - int last_fn = 0, last_ret = 0; + int ret = 0; int i; win_req_t req; memreq_t mem; struct net_device *dev = (struct net_device *)link->priv; ray_dev_t *local = netdev_priv(dev); - DEBUG(1, "ray_config(0x%p)\n", link); + dev_dbg(&link->dev, "ray_config\n"); /* Determine card type and firmware version */ printk(KERN_INFO "ray_cs Detected: %s%s%s%s\n", @@ -440,14 +420,17 @@ static int ray_config(struct pcmcia_device *link) /* Now allocate an interrupt line. Note that this does not actually assign a handler to the interrupt. */ - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; /* This actually configures the PCMCIA socket -- setting up the I/O windows and the interrupt mapping. */ - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /*** Set up 32k window for shared memory (transmit and control) ************/ req.Attributes = @@ -455,10 +438,14 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x8000; req.AccessSpeed = ray_mem_speed; - CS_CHECK(RequestWindow, pcmcia_request_window(&link, &req, &link->win)); + ret = pcmcia_request_window(&link, &req, &link->win); + if (ret) + goto failed; mem.CardOffset = 0x0000; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(link->win, &mem)); + ret = pcmcia_map_mem_page(link->win, &mem); + if (ret) + goto failed; local->sram = ioremap(req.Base, req.Size); /*** Set up 16k window for shared memory (receive buffer) ***************/ @@ -467,11 +454,14 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x4000; req.AccessSpeed = ray_mem_speed; - CS_CHECK(RequestWindow, - pcmcia_request_window(&link, &req, &local->rmem_handle)); + ret = pcmcia_request_window(&link, &req, &local->rmem_handle); + if (ret) + goto failed; mem.CardOffset = 0x8000; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(local->rmem_handle, &mem)); + ret = pcmcia_map_mem_page(local->rmem_handle, &mem); + if (ret) + goto failed; local->rmem = ioremap(req.Base, req.Size); /*** Set up window for attribute memory ***********************************/ @@ -480,16 +470,19 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x1000; req.AccessSpeed = ray_mem_speed; - CS_CHECK(RequestWindow, - pcmcia_request_window(&link, &req, &local->amem_handle)); + ret = pcmcia_request_window(&link, &req, &local->amem_handle); + if (ret) + goto failed; mem.CardOffset = 0x0000; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(local->amem_handle, &mem)); + ret = pcmcia_map_mem_page(local->amem_handle, &mem); + if (ret) + goto failed; local->amem = ioremap(req.Base, req.Size); - DEBUG(3, "ray_config sram=%p\n", local->sram); - DEBUG(3, "ray_config rmem=%p\n", local->rmem); - DEBUG(3, "ray_config amem=%p\n", local->amem); + dev_dbg(&link->dev, "ray_config sram=%p\n", local->sram); + dev_dbg(&link->dev, "ray_config rmem=%p\n", local->rmem); + dev_dbg(&link->dev, "ray_config amem=%p\n", local->amem); if (ray_init(dev) < 0) { ray_release(link); return -ENODEV; @@ -511,9 +504,7 @@ static int ray_config(struct pcmcia_device *link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); - +failed: ray_release(link); return -ENODEV; } /* ray_config */ @@ -543,9 +534,9 @@ static int ray_init(struct net_device *dev) struct ccs __iomem *pccs; ray_dev_t *local = netdev_priv(dev); struct pcmcia_device *link = local->finder; - DEBUG(1, "ray_init(0x%p)\n", dev); + dev_dbg(&link->dev, "ray_init(0x%p)\n", dev); if (!(pcmcia_dev_present(link))) { - DEBUG(0, "ray_init - device not present\n"); + dev_dbg(&link->dev, "ray_init - device not present\n"); return -1; } @@ -567,13 +558,13 @@ static int ray_init(struct net_device *dev) local->fw_ver = local->startup_res.firmware_version[0]; local->fw_bld = local->startup_res.firmware_version[1]; local->fw_var = local->startup_res.firmware_version[2]; - DEBUG(1, "ray_init firmware version %d.%d \n", local->fw_ver, + dev_dbg(&link->dev, "ray_init firmware version %d.%d \n", local->fw_ver, local->fw_bld); local->tib_length = 0x20; if ((local->fw_ver == 5) && (local->fw_bld >= 30)) local->tib_length = local->startup_res.tib_length; - DEBUG(2, "ray_init tib_length = 0x%02x\n", local->tib_length); + dev_dbg(&link->dev, "ray_init tib_length = 0x%02x\n", local->tib_length); /* Initialize CCS's to buffer free state */ pccs = ccs_base(local); for (i = 0; i < NUMBER_OF_CCS; i++) { @@ -592,7 +583,7 @@ static int ray_init(struct net_device *dev) clear_interrupt(local); /* Clear any interrupt from the card */ local->card_status = CARD_AWAITING_PARAM; - DEBUG(2, "ray_init ending\n"); + dev_dbg(&link->dev, "ray_init ending\n"); return 0; } /* ray_init */ @@ -605,9 +596,9 @@ static int dl_startup_params(struct net_device *dev) struct ccs __iomem *pccs; struct pcmcia_device *link = local->finder; - DEBUG(1, "dl_startup_params entered\n"); + dev_dbg(&link->dev, "dl_startup_params entered\n"); if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs dl_startup_params - device not present\n"); + dev_dbg(&link->dev, "ray_cs dl_startup_params - device not present\n"); return -1; } @@ -625,7 +616,7 @@ static int dl_startup_params(struct net_device *dev) local->dl_param_ccs = ccsindex; pccs = ccs_base(local) + ccsindex; writeb(CCS_DOWNLOAD_STARTUP_PARAMS, &pccs->cmd); - DEBUG(2, "dl_startup_params start ccsindex = %d\n", + dev_dbg(&link->dev, "dl_startup_params start ccsindex = %d\n", local->dl_param_ccs); /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { @@ -641,7 +632,7 @@ static int dl_startup_params(struct net_device *dev) local->timer.data = (long)local; local->timer.function = &verify_dl_startup; add_timer(&local->timer); - DEBUG(2, + dev_dbg(&link->dev, "ray_cs dl_startup_params started timer for verify_dl_startup\n"); return 0; } /* dl_startup_params */ @@ -717,11 +708,11 @@ static void verify_dl_startup(u_long data) struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs verify_dl_startup - device not present\n"); + dev_dbg(&link->dev, "ray_cs verify_dl_startup - device not present\n"); return; } -#ifdef PCMCIA_DEBUG - if (pc_debug > 2) { +#if 0 + { int i; printk(KERN_DEBUG "verify_dl_startup parameters sent via ccs %d:\n", @@ -760,7 +751,7 @@ static void start_net(u_long data) int ccsindex; struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs start_net - device not present\n"); + dev_dbg(&link->dev, "ray_cs start_net - device not present\n"); return; } /* Fill in the CCS fields for the ECF */ @@ -771,7 +762,7 @@ static void start_net(u_long data) writeb(0, &pccs->var.start_network.update_param); /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(1, "ray start net failed - card not ready for intr\n"); + dev_dbg(&link->dev, "ray start net failed - card not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); return; } @@ -790,7 +781,7 @@ static void join_net(u_long data) struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs join_net - device not present\n"); + dev_dbg(&link->dev, "ray_cs join_net - device not present\n"); return; } /* Fill in the CCS fields for the ECF */ @@ -802,7 +793,7 @@ static void join_net(u_long data) writeb(0, &pccs->var.join_network.net_initiated); /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(1, "ray join net failed - card not ready for intr\n"); + dev_dbg(&link->dev, "ray join net failed - card not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); return; } @@ -821,7 +812,7 @@ static void ray_release(struct pcmcia_device *link) ray_dev_t *local = netdev_priv(dev); int i; - DEBUG(1, "ray_release(0x%p)\n", link); + dev_dbg(&link->dev, "ray_release\n"); del_timer(&local->timer); @@ -831,13 +822,13 @@ static void ray_release(struct pcmcia_device *link) /* Do bother checking to see if these succeed or not */ i = pcmcia_release_window(local->amem_handle); if (i != 0) - DEBUG(0, "ReleaseWindow(local->amem) ret = %x\n", i); + dev_dbg(&link->dev, "ReleaseWindow(local->amem) ret = %x\n", i); i = pcmcia_release_window(local->rmem_handle); if (i != 0) - DEBUG(0, "ReleaseWindow(local->rmem) ret = %x\n", i); + dev_dbg(&link->dev, "ReleaseWindow(local->rmem) ret = %x\n", i); pcmcia_disable_device(link); - DEBUG(2, "ray_release ending\n"); + dev_dbg(&link->dev, "ray_release ending\n"); } static int ray_suspend(struct pcmcia_device *link) @@ -871,9 +862,9 @@ static int ray_dev_init(struct net_device *dev) ray_dev_t *local = netdev_priv(dev); struct pcmcia_device *link = local->finder; - DEBUG(1, "ray_dev_init(dev=%p)\n", dev); + dev_dbg(&link->dev, "ray_dev_init(dev=%p)\n", dev); if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_dev_init - device not present\n"); + dev_dbg(&link->dev, "ray_dev_init - device not present\n"); return -1; } #ifdef RAY_IMMEDIATE_INIT @@ -887,7 +878,7 @@ static int ray_dev_init(struct net_device *dev) /* Postpone the card init so that we can still configure the card, * for example using the Wireless Extensions. The init will happen * in ray_open() - Jean II */ - DEBUG(1, + dev_dbg(&link->dev, "ray_dev_init: postponing card init to ray_open() ; Status = %d\n", local->card_status); #endif /* RAY_IMMEDIATE_INIT */ @@ -896,7 +887,7 @@ static int ray_dev_init(struct net_device *dev) memcpy(dev->dev_addr, &local->sparm.b4.a_mac_addr, ADDRLEN); memset(dev->broadcast, 0xff, ETH_ALEN); - DEBUG(2, "ray_dev_init ending\n"); + dev_dbg(&link->dev, "ray_dev_init ending\n"); return 0; } @@ -906,9 +897,9 @@ static int ray_dev_config(struct net_device *dev, struct ifmap *map) ray_dev_t *local = netdev_priv(dev); struct pcmcia_device *link = local->finder; /* Dummy routine to satisfy device structure */ - DEBUG(1, "ray_dev_config(dev=%p,ifmap=%p)\n", dev, map); + dev_dbg(&link->dev, "ray_dev_config(dev=%p,ifmap=%p)\n", dev, map); if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_dev_config - device not present\n"); + dev_dbg(&link->dev, "ray_dev_config - device not present\n"); return -1; } @@ -924,14 +915,14 @@ static netdev_tx_t ray_dev_start_xmit(struct sk_buff *skb, short length = skb->len; if (!pcmcia_dev_present(link)) { - DEBUG(2, "ray_dev_start_xmit - device not present\n"); + dev_dbg(&link->dev, "ray_dev_start_xmit - device not present\n"); dev_kfree_skb(skb); return NETDEV_TX_OK; } - DEBUG(3, "ray_dev_start_xmit(skb=%p, dev=%p)\n", skb, dev); + dev_dbg(&link->dev, "ray_dev_start_xmit(skb=%p, dev=%p)\n", skb, dev); if (local->authentication_state == NEED_TO_AUTH) { - DEBUG(0, "ray_cs Sending authentication request.\n"); + dev_dbg(&link->dev, "ray_cs Sending authentication request.\n"); if (!build_auth_frame(local, local->auth_id, OPEN_AUTH_REQUEST)) { local->authentication_state = AUTHENTICATED; netif_stop_queue(dev); @@ -971,7 +962,7 @@ static int ray_hw_xmit(unsigned char *data, int len, struct net_device *dev, struct tx_msg __iomem *ptx; /* Address of xmit buffer in PC space */ short int addr; /* Address of xmit buffer in card space */ - DEBUG(3, "ray_hw_xmit(data=%p, len=%d, dev=%p)\n", data, len, dev); + pr_debug("ray_hw_xmit(data=%p, len=%d, dev=%p)\n", data, len, dev); if (len + TX_HEADER_LENGTH > TX_BUF_SIZE) { printk(KERN_INFO "ray_hw_xmit packet too large: %d bytes\n", len); @@ -979,9 +970,9 @@ static int ray_hw_xmit(unsigned char *data, int len, struct net_device *dev, } switch (ccsindex = get_free_tx_ccs(local)) { case ECCSBUSY: - DEBUG(2, "ray_hw_xmit tx_ccs table busy\n"); + pr_debug("ray_hw_xmit tx_ccs table busy\n"); case ECCSFULL: - DEBUG(2, "ray_hw_xmit No free tx ccs\n"); + pr_debug("ray_hw_xmit No free tx ccs\n"); case ECARDGONE: netif_stop_queue(dev); return XMIT_NO_CCS; @@ -1018,12 +1009,12 @@ static int ray_hw_xmit(unsigned char *data, int len, struct net_device *dev, writeb(PSM_CAM, &pccs->var.tx_request.pow_sav_mode); writeb(local->net_default_tx_rate, &pccs->var.tx_request.tx_rate); writeb(0, &pccs->var.tx_request.antenna); - DEBUG(3, "ray_hw_xmit default_tx_rate = 0x%x\n", + pr_debug("ray_hw_xmit default_tx_rate = 0x%x\n", local->net_default_tx_rate); /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(2, "ray_hw_xmit failed - ECF not ready for intr\n"); + pr_debug("ray_hw_xmit failed - ECF not ready for intr\n"); /* TBD very inefficient to copy packet to buffer, and then not send it, but the alternative is to queue the messages and that won't be done for a while. Maybe set tbusy until a CCS is free? @@ -1040,7 +1031,7 @@ static int translate_frame(ray_dev_t *local, struct tx_msg __iomem *ptx, { __be16 proto = ((struct ethhdr *)data)->h_proto; if (ntohs(proto) >= 1536) { /* DIX II ethernet frame */ - DEBUG(3, "ray_cs translate_frame DIX II\n"); + pr_debug("ray_cs translate_frame DIX II\n"); /* Copy LLC header to card buffer */ memcpy_toio(&ptx->var, eth2_llc, sizeof(eth2_llc)); memcpy_toio(((void __iomem *)&ptx->var) + sizeof(eth2_llc), @@ -1056,9 +1047,9 @@ static int translate_frame(ray_dev_t *local, struct tx_msg __iomem *ptx, len - ETH_HLEN); return (int)sizeof(struct snaphdr_t) - ETH_HLEN; } else { /* already 802 type, and proto is length */ - DEBUG(3, "ray_cs translate_frame 802\n"); + pr_debug("ray_cs translate_frame 802\n"); if (proto == htons(0xffff)) { /* evil netware IPX 802.3 without LLC */ - DEBUG(3, "ray_cs translate_frame evil IPX\n"); + pr_debug("ray_cs translate_frame evil IPX\n"); memcpy_toio(&ptx->var, data + ETH_HLEN, len - ETH_HLEN); return 0 - ETH_HLEN; } @@ -1603,7 +1594,7 @@ static int ray_open(struct net_device *dev) struct pcmcia_device *link; link = local->finder; - DEBUG(1, "ray_open('%s')\n", dev->name); + dev_dbg(&link->dev, "ray_open('%s')\n", dev->name); if (link->open == 0) local->num_multi = 0; @@ -1613,7 +1604,7 @@ static int ray_open(struct net_device *dev) if (local->card_status == CARD_AWAITING_PARAM) { int i; - DEBUG(1, "ray_open: doing init now !\n"); + dev_dbg(&link->dev, "ray_open: doing init now !\n"); /* Download startup parameters */ if ((i = dl_startup_params(dev)) < 0) { @@ -1629,7 +1620,7 @@ static int ray_open(struct net_device *dev) else netif_start_queue(dev); - DEBUG(2, "ray_open ending\n"); + dev_dbg(&link->dev, "ray_open ending\n"); return 0; } /* end ray_open */ @@ -1640,7 +1631,7 @@ static int ray_dev_close(struct net_device *dev) struct pcmcia_device *link; link = local->finder; - DEBUG(1, "ray_dev_close('%s')\n", dev->name); + dev_dbg(&link->dev, "ray_dev_close('%s')\n", dev->name); link->open--; netif_stop_queue(dev); @@ -1656,7 +1647,7 @@ static int ray_dev_close(struct net_device *dev) /*===========================================================================*/ static void ray_reset(struct net_device *dev) { - DEBUG(1, "ray_reset entered\n"); + pr_debug("ray_reset entered\n"); return; } @@ -1669,17 +1660,17 @@ static int interrupt_ecf(ray_dev_t *local, int ccs) struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs interrupt_ecf - device not present\n"); + dev_dbg(&link->dev, "ray_cs interrupt_ecf - device not present\n"); return -1; } - DEBUG(2, "interrupt_ecf(local=%p, ccs = 0x%x\n", local, ccs); + dev_dbg(&link->dev, "interrupt_ecf(local=%p, ccs = 0x%x\n", local, ccs); while (i && (readb(local->amem + CIS_OFFSET + ECF_INTR_OFFSET) & ECF_INTR_SET)) i--; if (i == 0) { - DEBUG(2, "ray_cs interrupt_ecf card not ready for interrupt\n"); + dev_dbg(&link->dev, "ray_cs interrupt_ecf card not ready for interrupt\n"); return -1; } /* Fill the mailbox, then kick the card */ @@ -1698,12 +1689,12 @@ static int get_free_tx_ccs(ray_dev_t *local) struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs get_free_tx_ccs - device not present\n"); + dev_dbg(&link->dev, "ray_cs get_free_tx_ccs - device not present\n"); return ECARDGONE; } if (test_and_set_bit(0, &local->tx_ccs_lock)) { - DEBUG(1, "ray_cs tx_ccs_lock busy\n"); + dev_dbg(&link->dev, "ray_cs tx_ccs_lock busy\n"); return ECCSBUSY; } @@ -1716,7 +1707,7 @@ static int get_free_tx_ccs(ray_dev_t *local) } } local->tx_ccs_lock = 0; - DEBUG(2, "ray_cs ERROR no free tx CCS for raylink card\n"); + dev_dbg(&link->dev, "ray_cs ERROR no free tx CCS for raylink card\n"); return ECCSFULL; } /* get_free_tx_ccs */ @@ -1730,11 +1721,11 @@ static int get_free_ccs(ray_dev_t *local) struct pcmcia_device *link = local->finder; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs get_free_ccs - device not present\n"); + dev_dbg(&link->dev, "ray_cs get_free_ccs - device not present\n"); return ECARDGONE; } if (test_and_set_bit(0, &local->ccs_lock)) { - DEBUG(1, "ray_cs ccs_lock busy\n"); + dev_dbg(&link->dev, "ray_cs ccs_lock busy\n"); return ECCSBUSY; } @@ -1747,7 +1738,7 @@ static int get_free_ccs(ray_dev_t *local) } } local->ccs_lock = 0; - DEBUG(1, "ray_cs ERROR no free CCS for raylink card\n"); + dev_dbg(&link->dev, "ray_cs ERROR no free CCS for raylink card\n"); return ECCSFULL; } /* get_free_ccs */ @@ -1823,7 +1814,7 @@ static struct net_device_stats *ray_get_stats(struct net_device *dev) struct pcmcia_device *link = local->finder; struct status __iomem *p = local->sram + STATUS_BASE; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs net_device_stats - device not present\n"); + dev_dbg(&link->dev, "ray_cs net_device_stats - device not present\n"); return &local->stats; } if (readb(&p->mrx_overflow_for_host)) { @@ -1856,12 +1847,12 @@ static void ray_update_parm(struct net_device *dev, UCHAR objid, UCHAR *value, struct ccs __iomem *pccs; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_update_parm - device not present\n"); + dev_dbg(&link->dev, "ray_update_parm - device not present\n"); return; } if ((ccsindex = get_free_ccs(local)) < 0) { - DEBUG(0, "ray_update_parm - No free ccs\n"); + dev_dbg(&link->dev, "ray_update_parm - No free ccs\n"); return; } pccs = ccs_base(local) + ccsindex; @@ -1874,7 +1865,7 @@ static void ray_update_parm(struct net_device *dev, UCHAR objid, UCHAR *value, } /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(0, "ray_cs associate failed - ECF not ready for intr\n"); + dev_dbg(&link->dev, "ray_cs associate failed - ECF not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); } } @@ -1891,12 +1882,12 @@ static void ray_update_multi_list(struct net_device *dev, int all) void __iomem *p = local->sram + HOST_TO_ECF_BASE; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_update_multi_list - device not present\n"); + dev_dbg(&link->dev, "ray_update_multi_list - device not present\n"); return; } else - DEBUG(2, "ray_update_multi_list(%p)\n", dev); + dev_dbg(&link->dev, "ray_update_multi_list(%p)\n", dev); if ((ccsindex = get_free_ccs(local)) < 0) { - DEBUG(1, "ray_update_multi - No free ccs\n"); + dev_dbg(&link->dev, "ray_update_multi - No free ccs\n"); return; } pccs = ccs_base(local) + ccsindex; @@ -1910,7 +1901,7 @@ static void ray_update_multi_list(struct net_device *dev, int all) for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { memcpy_toio(p, dmi->dmi_addr, ETH_ALEN); - DEBUG(1, + dev_dbg(&link->dev, "ray_update_multi add addr %02x%02x%02x%02x%02x%02x\n", dmi->dmi_addr[0], dmi->dmi_addr[1], dmi->dmi_addr[2], dmi->dmi_addr[3], @@ -1921,12 +1912,12 @@ static void ray_update_multi_list(struct net_device *dev, int all) if (i > 256 / ADDRLEN) i = 256 / ADDRLEN; writeb((UCHAR) i, &pccs->var); - DEBUG(1, "ray_cs update_multi %d addresses in list\n", i); + dev_dbg(&link->dev, "ray_cs update_multi %d addresses in list\n", i); /* Interrupt the firmware to process the command */ local->num_multi = i; } if (interrupt_ecf(local, ccsindex)) { - DEBUG(1, + dev_dbg(&link->dev, "ray_cs update_multi failed - ECF not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); } @@ -1938,11 +1929,11 @@ static void set_multicast_list(struct net_device *dev) ray_dev_t *local = netdev_priv(dev); UCHAR promisc; - DEBUG(2, "ray_cs set_multicast_list(%p)\n", dev); + pr_debug("ray_cs set_multicast_list(%p)\n", dev); if (dev->flags & IFF_PROMISC) { if (local->sparm.b5.a_promiscuous_mode == 0) { - DEBUG(1, "ray_cs set_multicast_list promisc on\n"); + pr_debug("ray_cs set_multicast_list promisc on\n"); local->sparm.b5.a_promiscuous_mode = 1; promisc = 1; ray_update_parm(dev, OBJID_promiscuous_mode, @@ -1950,7 +1941,7 @@ static void set_multicast_list(struct net_device *dev) } } else { if (local->sparm.b5.a_promiscuous_mode == 1) { - DEBUG(1, "ray_cs set_multicast_list promisc off\n"); + pr_debug("ray_cs set_multicast_list promisc off\n"); local->sparm.b5.a_promiscuous_mode = 0; promisc = 0; ray_update_parm(dev, OBJID_promiscuous_mode, @@ -1984,19 +1975,19 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) if (dev == NULL) /* Note that we want interrupts with dev->start == 0 */ return IRQ_NONE; - DEBUG(4, "ray_cs: interrupt for *dev=%p\n", dev); + pr_debug("ray_cs: interrupt for *dev=%p\n", dev); local = netdev_priv(dev); link = (struct pcmcia_device *)local->finder; if (!pcmcia_dev_present(link)) { - DEBUG(2, - "ray_cs interrupt from device not present or suspended.\n"); + pr_debug( + "ray_cs interrupt from device not present or suspended.\n"); return IRQ_NONE; } rcsindex = readb(&((struct scb __iomem *)(local->sram))->rcs_index); if (rcsindex >= (NUMBER_OF_CCS + NUMBER_OF_RCS)) { - DEBUG(1, "ray_cs interrupt bad rcsindex = 0x%x\n", rcsindex); + dev_dbg(&link->dev, "ray_cs interrupt bad rcsindex = 0x%x\n", rcsindex); clear_interrupt(local); return IRQ_HANDLED; } @@ -2008,33 +1999,33 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) case CCS_DOWNLOAD_STARTUP_PARAMS: /* Happens in firmware someday */ del_timer(&local->timer); if (status == CCS_COMMAND_COMPLETE) { - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt download_startup_parameters OK\n"); } else { - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt download_startup_parameters fail\n"); } break; case CCS_UPDATE_PARAMS: - DEBUG(1, "ray_cs interrupt update params done\n"); + dev_dbg(&link->dev, "ray_cs interrupt update params done\n"); if (status != CCS_COMMAND_COMPLETE) { tmp = readb(&pccs->var.update_param. failure_cause); - DEBUG(0, + dev_dbg(&link->dev, "ray_cs interrupt update params failed - reason %d\n", tmp); } break; case CCS_REPORT_PARAMS: - DEBUG(1, "ray_cs interrupt report params done\n"); + dev_dbg(&link->dev, "ray_cs interrupt report params done\n"); break; case CCS_UPDATE_MULTICAST_LIST: /* Note that this CCS isn't returned */ - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt CCS Update Multicast List done\n"); break; case CCS_UPDATE_POWER_SAVINGS_MODE: - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt update power save mode done\n"); break; case CCS_START_NETWORK: @@ -2043,11 +2034,11 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) if (readb (&pccs->var.start_network.net_initiated) == 1) { - DEBUG(0, + dev_dbg(&link->dev, "ray_cs interrupt network \"%s\" started\n", local->sparm.b4.a_current_ess_id); } else { - DEBUG(0, + dev_dbg(&link->dev, "ray_cs interrupt network \"%s\" joined\n", local->sparm.b4.a_current_ess_id); } @@ -2075,12 +2066,12 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) local->timer.expires = jiffies + HZ * 5; local->timer.data = (long)local; if (status == CCS_START_NETWORK) { - DEBUG(0, + dev_dbg(&link->dev, "ray_cs interrupt network \"%s\" start failed\n", local->sparm.b4.a_current_ess_id); local->timer.function = &start_net; } else { - DEBUG(0, + dev_dbg(&link->dev, "ray_cs interrupt network \"%s\" join failed\n", local->sparm.b4.a_current_ess_id); local->timer.function = &join_net; @@ -2091,19 +2082,19 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) case CCS_START_ASSOCIATION: if (status == CCS_COMMAND_COMPLETE) { local->card_status = CARD_ASSOC_COMPLETE; - DEBUG(0, "ray_cs association successful\n"); + dev_dbg(&link->dev, "ray_cs association successful\n"); } else { - DEBUG(0, "ray_cs association failed,\n"); + dev_dbg(&link->dev, "ray_cs association failed,\n"); local->card_status = CARD_ASSOC_FAILED; join_net((u_long) local); } break; case CCS_TX_REQUEST: if (status == CCS_COMMAND_COMPLETE) { - DEBUG(3, + dev_dbg(&link->dev, "ray_cs interrupt tx request complete\n"); } else { - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt tx request failed\n"); } if (!sniffer) @@ -2111,21 +2102,21 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) netif_wake_queue(dev); break; case CCS_TEST_MEMORY: - DEBUG(1, "ray_cs interrupt mem test done\n"); + dev_dbg(&link->dev, "ray_cs interrupt mem test done\n"); break; case CCS_SHUTDOWN: - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt Unexpected CCS returned - Shutdown\n"); break; case CCS_DUMP_MEMORY: - DEBUG(1, "ray_cs interrupt dump memory done\n"); + dev_dbg(&link->dev, "ray_cs interrupt dump memory done\n"); break; case CCS_START_TIMER: - DEBUG(2, + dev_dbg(&link->dev, "ray_cs interrupt DING - raylink timer expired\n"); break; default: - DEBUG(1, + dev_dbg(&link->dev, "ray_cs interrupt Unexpected CCS 0x%x returned 0x%x\n", rcsindex, cmd); } @@ -2139,7 +2130,7 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) ray_rx(dev, local, prcs); break; case REJOIN_NET_COMPLETE: - DEBUG(1, "ray_cs interrupt rejoin net complete\n"); + dev_dbg(&link->dev, "ray_cs interrupt rejoin net complete\n"); local->card_status = CARD_ACQ_COMPLETE; /* do we need to clear tx buffers CCS's? */ if (local->sparm.b4.a_network_type == ADHOC) { @@ -2149,7 +2140,7 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) memcpy_fromio(&local->bss_id, prcs->var.rejoin_net_complete. bssid, ADDRLEN); - DEBUG(1, + dev_dbg(&link->dev, "ray_cs new BSSID = %02x%02x%02x%02x%02x%02x\n", local->bss_id[0], local->bss_id[1], local->bss_id[2], local->bss_id[3], @@ -2159,15 +2150,15 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id) } break; case ROAMING_INITIATED: - DEBUG(1, "ray_cs interrupt roaming initiated\n"); + dev_dbg(&link->dev, "ray_cs interrupt roaming initiated\n"); netif_stop_queue(dev); local->card_status = CARD_DOING_ACQ; break; case JAPAN_CALL_SIGN_RXD: - DEBUG(1, "ray_cs interrupt japan call sign rx\n"); + dev_dbg(&link->dev, "ray_cs interrupt japan call sign rx\n"); break; default: - DEBUG(1, + dev_dbg(&link->dev, "ray_cs Unexpected interrupt for RCS 0x%x cmd = 0x%x\n", rcsindex, (unsigned int)readb(&prcs->interrupt_id)); @@ -2186,7 +2177,7 @@ static void ray_rx(struct net_device *dev, ray_dev_t *local, int rx_len; unsigned int pkt_addr; void __iomem *pmsg; - DEBUG(4, "ray_rx process rx packet\n"); + pr_debug("ray_rx process rx packet\n"); /* Calculate address of packet within Rx buffer */ pkt_addr = ((readb(&prcs->var.rx_packet.rx_data_ptr[0]) << 8) @@ -2199,28 +2190,28 @@ static void ray_rx(struct net_device *dev, ray_dev_t *local, pmsg = local->rmem + pkt_addr; switch (readb(pmsg)) { case DATA_TYPE: - DEBUG(4, "ray_rx data type\n"); + pr_debug("ray_rx data type\n"); rx_data(dev, prcs, pkt_addr, rx_len); break; case AUTHENTIC_TYPE: - DEBUG(4, "ray_rx authentic type\n"); + pr_debug("ray_rx authentic type\n"); if (sniffer) rx_data(dev, prcs, pkt_addr, rx_len); else rx_authenticate(local, prcs, pkt_addr, rx_len); break; case DEAUTHENTIC_TYPE: - DEBUG(4, "ray_rx deauth type\n"); + pr_debug("ray_rx deauth type\n"); if (sniffer) rx_data(dev, prcs, pkt_addr, rx_len); else rx_deauthenticate(local, prcs, pkt_addr, rx_len); break; case NULL_MSG_TYPE: - DEBUG(3, "ray_cs rx NULL msg\n"); + pr_debug("ray_cs rx NULL msg\n"); break; case BEACON_TYPE: - DEBUG(4, "ray_rx beacon type\n"); + pr_debug("ray_rx beacon type\n"); if (sniffer) rx_data(dev, prcs, pkt_addr, rx_len); @@ -2233,7 +2224,7 @@ static void ray_rx(struct net_device *dev, ray_dev_t *local, ray_get_stats(dev); break; default: - DEBUG(0, "ray_cs unknown pkt type %2x\n", + pr_debug("ray_cs unknown pkt type %2x\n", (unsigned int)readb(pmsg)); break; } @@ -2262,7 +2253,7 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, rx_len > (dev->mtu + RX_MAC_HEADER_LENGTH + ETH_HLEN + FCS_LEN)) { - DEBUG(0, + pr_debug( "ray_cs invalid packet length %d received \n", rx_len); return; @@ -2273,17 +2264,17 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, rx_len > (dev->mtu + RX_MAC_HEADER_LENGTH + ETH_HLEN + FCS_LEN)) { - DEBUG(0, + pr_debug( "ray_cs invalid packet length %d received \n", rx_len); return; } } } - DEBUG(4, "ray_cs rx_data packet\n"); + pr_debug("ray_cs rx_data packet\n"); /* If fragmented packet, verify sizes of fragments add up */ if (readb(&prcs->var.rx_packet.next_frag_rcs_index) != 0xFF) { - DEBUG(1, "ray_cs rx'ed fragment\n"); + pr_debug("ray_cs rx'ed fragment\n"); tmp = (readb(&prcs->var.rx_packet.totalpacketlength[0]) << 8) + readb(&prcs->var.rx_packet.totalpacketlength[1]); total_len = tmp; @@ -2301,7 +2292,7 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, } while (1); if (tmp < 0) { - DEBUG(0, + pr_debug( "ray_cs rx_data fragment lengths don't add up\n"); local->stats.rx_dropped++; release_frag_chain(local, prcs); @@ -2313,7 +2304,7 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, skb = dev_alloc_skb(total_len + 5); if (skb == NULL) { - DEBUG(0, "ray_cs rx_data could not allocate skb\n"); + pr_debug("ray_cs rx_data could not allocate skb\n"); local->stats.rx_dropped++; if (readb(&prcs->var.rx_packet.next_frag_rcs_index) != 0xFF) release_frag_chain(local, prcs); @@ -2321,7 +2312,7 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, } skb_reserve(skb, 2); /* Align IP on 16 byte (TBD check this) */ - DEBUG(4, "ray_cs rx_data total_len = %x, rx_len = %x\n", total_len, + pr_debug("ray_cs rx_data total_len = %x, rx_len = %x\n", total_len, rx_len); /************************/ @@ -2354,7 +2345,7 @@ static void rx_data(struct net_device *dev, struct rcs __iomem *prcs, tmp = 17; if (readb(&prcs->var.rx_packet.next_frag_rcs_index) != 0xFF) { prcslink = prcs; - DEBUG(1, "ray_cs rx_data in fragment loop\n"); + pr_debug("ray_cs rx_data in fragment loop\n"); do { prcslink = rcs_base(local) + @@ -2426,8 +2417,8 @@ static void untranslate(ray_dev_t *local, struct sk_buff *skb, int len) memcpy(destaddr, ieee80211_get_DA(pmac), ADDRLEN); memcpy(srcaddr, ieee80211_get_SA(pmac), ADDRLEN); -#ifdef PCMCIA_DEBUG - if (pc_debug > 3) { +#if 0 + if { print_hex_dump(KERN_DEBUG, "skb->data before untranslate: ", DUMP_PREFIX_NONE, 16, 1, skb->data, 64, true); @@ -2441,7 +2432,7 @@ static void untranslate(ray_dev_t *local, struct sk_buff *skb, int len) if (psnap->dsap != 0xaa || psnap->ssap != 0xaa || psnap->ctrl != 3) { /* not a snap type so leave it alone */ - DEBUG(3, "ray_cs untranslate NOT SNAP %02x %02x %02x\n", + pr_debug("ray_cs untranslate NOT SNAP %02x %02x %02x\n", psnap->dsap, psnap->ssap, psnap->ctrl); delta = RX_MAC_HEADER_LENGTH - ETH_HLEN; @@ -2450,7 +2441,7 @@ static void untranslate(ray_dev_t *local, struct sk_buff *skb, int len) } else { /* Its a SNAP */ if (memcmp(psnap->org, org_bridge, 3) == 0) { /* EtherII and nuke the LLC */ - DEBUG(3, "ray_cs untranslate Bridge encap\n"); + pr_debug("ray_cs untranslate Bridge encap\n"); delta = RX_MAC_HEADER_LENGTH + sizeof(struct snaphdr_t) - ETH_HLEN; peth = (struct ethhdr *)(skb->data + delta); @@ -2459,14 +2450,14 @@ static void untranslate(ray_dev_t *local, struct sk_buff *skb, int len) switch (ntohs(type)) { case ETH_P_IPX: case ETH_P_AARP: - DEBUG(3, "ray_cs untranslate RFC IPX/AARP\n"); + pr_debug("ray_cs untranslate RFC IPX/AARP\n"); delta = RX_MAC_HEADER_LENGTH - ETH_HLEN; peth = (struct ethhdr *)(skb->data + delta); peth->h_proto = htons(len - RX_MAC_HEADER_LENGTH); break; default: - DEBUG(3, "ray_cs untranslate RFC default\n"); + pr_debug("ray_cs untranslate RFC default\n"); delta = RX_MAC_HEADER_LENGTH + sizeof(struct snaphdr_t) - ETH_HLEN; peth = (struct ethhdr *)(skb->data + delta); @@ -2482,12 +2473,12 @@ static void untranslate(ray_dev_t *local, struct sk_buff *skb, int len) } /* TBD reserve skb_reserve(skb, delta); */ skb_pull(skb, delta); - DEBUG(3, "untranslate after skb_pull(%d), skb->data = %p\n", delta, + pr_debug("untranslate after skb_pull(%d), skb->data = %p\n", delta, skb->data); memcpy(peth->h_dest, destaddr, ADDRLEN); memcpy(peth->h_source, srcaddr, ADDRLEN); -#ifdef PCMCIA_DEBUG - if (pc_debug > 3) { +#if 0 + { int i; printk(KERN_DEBUG "skb->data after untranslate:"); for (i = 0; i < 64; i++) @@ -2529,7 +2520,7 @@ static void release_frag_chain(ray_dev_t *local, struct rcs __iomem *prcs) while (tmp--) { writeb(CCS_BUFFER_FREE, &prcslink->buffer_status); if (rcsindex >= (NUMBER_OF_CCS + NUMBER_OF_RCS)) { - DEBUG(1, "ray_cs interrupt bad rcsindex = 0x%x\n", + pr_debug("ray_cs interrupt bad rcsindex = 0x%x\n", rcsindex); break; } @@ -2543,9 +2534,9 @@ static void release_frag_chain(ray_dev_t *local, struct rcs __iomem *prcs) static void authenticate(ray_dev_t *local) { struct pcmcia_device *link = local->finder; - DEBUG(0, "ray_cs Starting authentication.\n"); + dev_dbg(&link->dev, "ray_cs Starting authentication.\n"); if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs authenticate - device not present\n"); + dev_dbg(&link->dev, "ray_cs authenticate - device not present\n"); return; } @@ -2573,11 +2564,11 @@ static void rx_authenticate(ray_dev_t *local, struct rcs __iomem *prcs, copy_from_rx_buff(local, buff, pkt_addr, rx_len & 0xff); /* if we are trying to get authenticated */ if (local->sparm.b4.a_network_type == ADHOC) { - DEBUG(1, "ray_cs rx_auth var= %02x %02x %02x %02x %02x %02x\n", + pr_debug("ray_cs rx_auth var= %02x %02x %02x %02x %02x %02x\n", msg->var[0], msg->var[1], msg->var[2], msg->var[3], msg->var[4], msg->var[5]); if (msg->var[2] == 1) { - DEBUG(0, "ray_cs Sending authentication response.\n"); + pr_debug("ray_cs Sending authentication response.\n"); if (!build_auth_frame (local, msg->mac.addr_2, OPEN_AUTH_RESPONSE)) { local->authentication_state = NEED_TO_AUTH; @@ -2591,13 +2582,13 @@ static void rx_authenticate(ray_dev_t *local, struct rcs __iomem *prcs, /* Verify authentication sequence #2 and success */ if (msg->var[2] == 2) { if ((msg->var[3] | msg->var[4]) == 0) { - DEBUG(1, "Authentication successful\n"); + pr_debug("Authentication successful\n"); local->card_status = CARD_AUTH_COMPLETE; associate(local); local->authentication_state = AUTHENTICATED; } else { - DEBUG(0, "Authentication refused\n"); + pr_debug("Authentication refused\n"); local->card_status = CARD_AUTH_REFUSED; join_net((u_long) local); local->authentication_state = @@ -2617,22 +2608,22 @@ static void associate(ray_dev_t *local) struct net_device *dev = link->priv; int ccsindex; if (!(pcmcia_dev_present(link))) { - DEBUG(2, "ray_cs associate - device not present\n"); + dev_dbg(&link->dev, "ray_cs associate - device not present\n"); return; } /* If no tx buffers available, return */ if ((ccsindex = get_free_ccs(local)) < 0) { /* TBD should never be here but... what if we are? */ - DEBUG(1, "ray_cs associate - No free ccs\n"); + dev_dbg(&link->dev, "ray_cs associate - No free ccs\n"); return; } - DEBUG(1, "ray_cs Starting association with access point\n"); + dev_dbg(&link->dev, "ray_cs Starting association with access point\n"); pccs = ccs_base(local) + ccsindex; /* fill in the CCS */ writeb(CCS_START_ASSOCIATION, &pccs->cmd); /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(1, "ray_cs associate failed - ECF not ready for intr\n"); + dev_dbg(&link->dev, "ray_cs associate failed - ECF not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); del_timer(&local->timer); @@ -2655,7 +2646,7 @@ static void rx_deauthenticate(ray_dev_t *local, struct rcs __iomem *prcs, /* UCHAR buff[256]; struct rx_msg *msg = (struct rx_msg *)buff; */ - DEBUG(0, "Deauthentication frame received\n"); + pr_debug("Deauthentication frame received\n"); local->authentication_state = UNAUTHENTICATED; /* Need to reauthenticate or rejoin depending on reason code */ /* copy_from_rx_buff(local, buff, pkt_addr, rx_len & 0xff); @@ -2823,7 +2814,7 @@ static int build_auth_frame(ray_dev_t *local, UCHAR *dest, int auth_type) /* If no tx buffers available, return */ if ((ccsindex = get_free_tx_ccs(local)) < 0) { - DEBUG(1, "ray_cs send authenticate - No free tx ccs\n"); + pr_debug("ray_cs send authenticate - No free tx ccs\n"); return -1; } @@ -2855,7 +2846,7 @@ static int build_auth_frame(ray_dev_t *local, UCHAR *dest, int auth_type) /* Interrupt the firmware to process the command */ if (interrupt_ecf(local, ccsindex)) { - DEBUG(1, + pr_debug( "ray_cs send authentication request failed - ECF not ready for intr\n"); writeb(CCS_BUFFER_FREE, &(pccs++)->buffer_status); return -1; @@ -2942,9 +2933,9 @@ static int __init init_ray_cs(void) { int rc; - DEBUG(1, "%s\n", rcsid); + pr_debug("%s\n", rcsid); rc = pcmcia_register_driver(&ray_driver); - DEBUG(1, "raylink init_module register_pcmcia_driver returns 0x%x\n", + pr_debug("raylink init_module register_pcmcia_driver returns 0x%x\n", rc); #ifdef CONFIG_PROC_FS @@ -2964,7 +2955,7 @@ static int __init init_ray_cs(void) static void __exit exit_ray_cs(void) { - DEBUG(0, "ray_cs: cleanup_module\n"); + pr_debug("ray_cs: cleanup_module\n"); #ifdef CONFIG_PROC_FS remove_proc_entry("driver/ray_cs/ray_cs", NULL); -- cgit v0.10.2 From 2caff14713d53abba273e6095495788e2720f756 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:53:36 +0200 Subject: pcmcia: use dynamic debug infrastructure, deprecate CS_CHECK (wireless) Convert PCMCIA drivers to use the dynamic debug infrastructure, instead of requiring manual settings of PCMCIA_DEBUG. Also, remove all usages of the CS_CHECK macro and replace them with proper Linux style calling and return value checking. The extra error reporting may be dropped, as the PCMCIA core already complains about any (non-driver-author) errors. CC: linux-wireless@vger.kernel.org CC: netdev@vger.kernel.org Signed-off-by: Dominik Brodowski diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c index d0593ed..a1b84fc 100644 --- a/drivers/net/wireless/airo_cs.c +++ b/drivers/net/wireless/airo_cs.c @@ -43,21 +43,6 @@ #include "airo.h" -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -static char *version = "$Revision: 1.2 $"; -#define DEBUG(n, args...) if (pc_debug > (n)) printk(KERN_DEBUG args); -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -145,7 +130,7 @@ static int airo_probe(struct pcmcia_device *p_dev) { local_info_t *local; - DEBUG(0, "airo_attach()\n"); + dev_dbg(&p_dev->dev, "airo_attach()\n"); /* Interrupt setup */ p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; @@ -184,7 +169,7 @@ static int airo_probe(struct pcmcia_device *p_dev) static void airo_detach(struct pcmcia_device *link) { - DEBUG(0, "airo_detach(0x%p)\n", link); + dev_dbg(&link->dev, "airo_detach\n"); airo_release(link); @@ -204,9 +189,6 @@ static void airo_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int airo_cs_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -291,11 +273,11 @@ static int airo_config(struct pcmcia_device *link) { local_info_t *dev; win_req_t *req; - int last_fn, last_ret; + int ret; dev = link->priv; - DEBUG(0, "airo_config(0x%p)\n", link); + dev_dbg(&link->dev, "airo_config\n"); req = kzalloc(sizeof(win_req_t), GFP_KERNEL); if (!req) @@ -315,8 +297,8 @@ static int airo_config(struct pcmcia_device *link) * and most client drivers will only use the CIS to fill in * implementation-defined details. */ - last_ret = pcmcia_loop_config(link, airo_cs_config_check, req); - if (last_ret) + ret = pcmcia_loop_config(link, airo_cs_config_check, req); + if (ret) goto failed; /* @@ -324,21 +306,25 @@ static int airo_config(struct pcmcia_device *link) handler to the interrupt, unless the 'Handler' member of the irq structure is initialized. */ - if (link->conf.Attributes & CONF_ENABLE_IRQ) - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + if (link->conf.Attributes & CONF_ENABLE_IRQ) { + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + } /* This actually configures the PCMCIA socket -- setting up the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; ((local_info_t *)link->priv)->eth_dev = init_airo_card(link->irq.AssignedIRQ, link->io.BasePort1, 1, &handle_to_dev(link)); if (!((local_info_t *)link->priv)->eth_dev) - goto cs_failed; + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -368,8 +354,6 @@ static int airo_config(struct pcmcia_device *link) kfree(req); return 0; - cs_failed: - cs_error(link, last_fn, last_ret); failed: airo_release(link); kfree(req); @@ -386,7 +370,7 @@ static int airo_config(struct pcmcia_device *link) static void airo_release(struct pcmcia_device *link) { - DEBUG(0, "airo_release(0x%p)\n", link); + dev_dbg(&link->dev, "airo_release\n"); pcmcia_disable_device(link); } diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c index ddaa859..7838572 100644 --- a/drivers/net/wireless/atmel_cs.c +++ b/drivers/net/wireless/atmel_cs.c @@ -55,22 +55,6 @@ #include "atmel.h" -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -static char *version = "$Revision: 1.2 $"; -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args); -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -155,7 +139,7 @@ static int atmel_probe(struct pcmcia_device *p_dev) { local_info_t *local; - DEBUG(0, "atmel_attach()\n"); + dev_dbg(&p_dev->dev, "atmel_attach()\n"); /* Interrupt setup */ p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; @@ -194,7 +178,7 @@ static int atmel_probe(struct pcmcia_device *p_dev) static void atmel_detach(struct pcmcia_device *link) { - DEBUG(0, "atmel_detach(0x%p)\n", link); + dev_dbg(&link->dev, "atmel_detach\n"); atmel_release(link); @@ -209,9 +193,6 @@ static void atmel_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - /* Call-back function to interrogate PCMCIA-specific information about the current existance of the card */ static int card_present(void *arg) @@ -275,13 +256,13 @@ static int atmel_config_check(struct pcmcia_device *p_dev, static int atmel_config(struct pcmcia_device *link) { local_info_t *dev; - int last_fn, last_ret; + int ret; struct pcmcia_device_id *did; dev = link->priv; did = dev_get_drvdata(&handle_to_dev(link)); - DEBUG(0, "atmel_config(0x%p)\n", link); + dev_dbg(&link->dev, "atmel_config\n"); /* In this loop, we scan the CIS for configuration table entries, @@ -303,20 +284,25 @@ static int atmel_config(struct pcmcia_device *link) handler to the interrupt, unless the 'Handler' member of the irq structure is initialized. */ - if (link->conf.Attributes & CONF_ENABLE_IRQ) - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + if (link->conf.Attributes & CONF_ENABLE_IRQ) { + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + } /* This actually configures the PCMCIA socket -- setting up the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; if (link->irq.AssignedIRQ == 0) { printk(KERN_ALERT "atmel: cannot assign IRQ: check that CONFIG_ISA is set in kernel config."); - goto cs_failed; + goto failed; } ((local_info_t*)link->priv)->eth_dev = @@ -327,7 +313,7 @@ static int atmel_config(struct pcmcia_device *link) card_present, link); if (!((local_info_t*)link->priv)->eth_dev) - goto cs_failed; + goto failed; /* @@ -340,8 +326,6 @@ static int atmel_config(struct pcmcia_device *link) return 0; - cs_failed: - cs_error(link, last_fn, last_ret); failed: atmel_release(link); return -ENODEV; @@ -359,7 +343,7 @@ static void atmel_release(struct pcmcia_device *link) { struct net_device *dev = ((local_info_t*)link->priv)->eth_dev; - DEBUG(0, "atmel_release(0x%p)\n", link); + dev_dbg(&link->dev, "atmel_release\n"); if (dev) stop_atmel_card(dev); diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c index 31b60dd..ca3ab84 100644 --- a/drivers/net/wireless/hostap/hostap_cs.c +++ b/drivers/net/wireless/hostap/hostap_cs.c @@ -510,10 +510,6 @@ static void prism2_detach(struct pcmcia_device *link) } -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - - /* run after a CARD_INSERTION event is received to configure the PCMCIA * socket and make the device available to the system */ @@ -605,7 +601,6 @@ static int prism2_config(struct pcmcia_device *link) struct hostap_interface *iface; local_info_t *local; int ret = 1; - int last_fn, last_ret; struct hostap_cs_priv *hw_priv; PDEBUG(DEBUG_FLOW, "prism2_config()\n"); @@ -617,13 +612,12 @@ static int prism2_config(struct pcmcia_device *link) } /* Look for an appropriate configuration table entry in the CIS */ - last_ret = pcmcia_loop_config(link, prism2_config_check, NULL); - if (last_ret) { + ret = pcmcia_loop_config(link, prism2_config_check, NULL); + if (ret) { if (!ignore_cis_vcc) printk(KERN_ERR "GetNextTuple(): No matching " "CIS configuration. Maybe you need the " "ignore_cis_vcc=1 parameter.\n"); - cs_error(link, RequestIO, last_ret); goto failed; } @@ -652,8 +646,9 @@ static int prism2_config(struct pcmcia_device *link) link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = prism2_interrupt; link->irq.Instance = dev; - CS_CHECK(RequestIRQ, - pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -661,8 +656,9 @@ static int prism2_config(struct pcmcia_device *link) * the I/O windows and the interrupt mapping, and putting the * card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -695,9 +691,6 @@ static int prism2_config(struct pcmcia_device *link) } return ret; - cs_failed: - cs_error(link, last_fn, last_ret); - failed: kfree(hw_priv); prism2_release((u_long)link); diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index 9498b46..cbd85de 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -145,23 +145,6 @@ static const unsigned int txConfEUD = 0x10; /* Enable Uni-Data packets */ static const unsigned int txConfKey = 0x02; /* Scramble data packets */ static const unsigned int txConfLoop = 0x01; /* Loopback mode */ -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"netwave_cs.c 0.3.0 Thu Jul 17 14:36:02 1997 (John Markus Bjørndalen)\n"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -383,7 +366,7 @@ static int netwave_probe(struct pcmcia_device *link) struct net_device *dev; netwave_private *priv; - DEBUG(0, "netwave_attach()\n"); + dev_dbg(&link->dev, "netwave_attach()\n"); /* Initialize the struct pcmcia_device structure */ dev = alloc_etherdev(sizeof(netwave_private)); @@ -438,7 +421,7 @@ static void netwave_detach(struct pcmcia_device *link) { struct net_device *dev = link->priv; - DEBUG(0, "netwave_detach(0x%p)\n", link); + dev_dbg(&link->dev, "netwave_detach\n"); netwave_release(link); @@ -725,18 +708,15 @@ static const struct iw_handler_def netwave_handler_def = * */ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int netwave_pcmcia_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; netwave_private *priv = netdev_priv(dev); - int i, j, last_ret, last_fn; + int i, j, ret; win_req_t req; memreq_t mem; u_char __iomem *ramBase = NULL; - DEBUG(0, "netwave_pcmcia_config(0x%p)\n", link); + dev_dbg(&link->dev, "netwave_pcmcia_config\n"); /* * Try allocating IO ports. This tries a few fixed addresses. @@ -749,22 +729,24 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { if (i == 0) break; } - if (i != 0) { - cs_error(link, RequestIO, i); + if (i != 0) goto failed; - } /* * Now allocate an interrupt line. Note that this does not * actually assign a handler to the interrupt. */ - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; /* * This actually configures the PCMCIA socket -- setting up * the I/O windows and the interrupt mapping. */ - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* * Allocate a 32K memory window. Note that the struct pcmcia_device @@ -772,14 +754,18 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { * device needs several windows, you'll need to keep track of * the handles in your private data structure, dev->priv. */ - DEBUG(1, "Setting mem speed of %d\n", mem_speed); + dev_dbg(&link->dev, "Setting mem speed of %d\n", mem_speed); req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_CM|WIN_ENABLE; req.Base = 0; req.Size = 0x8000; req.AccessSpeed = mem_speed; - CS_CHECK(RequestWindow, pcmcia_request_window(&link, &req, &link->win)); + ret = pcmcia_request_window(&link, &req, &link->win); + if (ret) + goto failed; mem.CardOffset = 0x20000; mem.Page = 0; - CS_CHECK(MapMemPage, pcmcia_map_mem_page(link->win, &mem)); + ret = pcmcia_map_mem_page(link->win, &mem); + if (ret) + goto failed; /* Store base address of the common window frame */ ramBase = ioremap(req.Base, 0x8000); @@ -818,8 +804,6 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { get_uint16(ramBase + NETWAVE_EREG_ARW+2)); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: netwave_release(link); return -ENODEV; @@ -837,7 +821,7 @@ static void netwave_release(struct pcmcia_device *link) struct net_device *dev = link->priv; netwave_private *priv = netdev_priv(dev); - DEBUG(0, "netwave_release(0x%p)\n", link); + dev_dbg(&link->dev, "netwave_release\n"); pcmcia_disable_device(link); if (link->win) @@ -892,7 +876,7 @@ static void netwave_reset(struct net_device *dev) { u_char __iomem *ramBase = priv->ramBase; unsigned int iobase = dev->base_addr; - DEBUG(0, "netwave_reset: Done with hardware reset\n"); + pr_debug("netwave_reset: Done with hardware reset\n"); priv->timeoutCounter = 0; @@ -988,7 +972,7 @@ static int netwave_hw_xmit(unsigned char* data, int len, dev->stats.tx_bytes += len; - DEBUG(3, "Transmitting with SPCQ %x SPU %x LIF %x ISPLQ %x\n", + pr_debug("Transmitting with SPCQ %x SPU %x LIF %x ISPLQ %x\n", readb(ramBase + NETWAVE_EREG_SPCQ), readb(ramBase + NETWAVE_EREG_SPU), readb(ramBase + NETWAVE_EREG_LIF), @@ -1000,7 +984,7 @@ static int netwave_hw_xmit(unsigned char* data, int len, MaxData = get_uint16(ramBase + NETWAVE_EREG_TDP+2); DataOffset = get_uint16(ramBase + NETWAVE_EREG_TDP+4); - DEBUG(3, "TxFreeList %x, MaxData %x, DataOffset %x\n", + pr_debug("TxFreeList %x, MaxData %x, DataOffset %x\n", TxFreeList, MaxData, DataOffset); /* Copy packet to the adapter fragment buffers */ @@ -1088,7 +1072,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) status = inb(iobase + NETWAVE_REG_ASR); if (!pcmcia_dev_present(link)) { - DEBUG(1, "netwave_interrupt: Interrupt with status 0x%x " + pr_debug("netwave_interrupt: Interrupt with status 0x%x " "from removed or suspended card!\n", status); break; } @@ -1132,7 +1116,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) int txStatus; txStatus = readb(ramBase + NETWAVE_EREG_TSER); - DEBUG(3, "Transmit done. TSER = %x id %x\n", + pr_debug("Transmit done. TSER = %x id %x\n", txStatus, readb(ramBase + NETWAVE_EREG_TSER + 1)); if (txStatus & 0x20) { @@ -1156,7 +1140,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) * TxGU and TxNOAP is set. (Those are the only ones * to set TxErr). */ - DEBUG(3, "netwave_interrupt: TxDN with error status %x\n", + pr_debug("netwave_interrupt: TxDN with error status %x\n", txStatus); /* Clear out TxGU, TxNOAP, TxErr and TxTrys */ @@ -1164,7 +1148,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) writeb(0xdf & txStatus, ramBase+NETWAVE_EREG_TSER+4); ++dev->stats.tx_errors; } - DEBUG(3, "New status is TSER %x ASR %x\n", + pr_debug("New status is TSER %x ASR %x\n", readb(ramBase + NETWAVE_EREG_TSER), inb(iobase + NETWAVE_REG_ASR)); @@ -1172,7 +1156,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) } /* TxBA, this would trigger on all error packets received */ /* if (status & 0x01) { - DEBUG(4, "Transmit buffers available, %x\n", status); + pr_debug("Transmit buffers available, %x\n", status); } */ } @@ -1190,7 +1174,7 @@ static irqreturn_t netwave_interrupt(int irq, void* dev_id) */ static void netwave_watchdog(struct net_device *dev) { - DEBUG(1, "%s: netwave_watchdog: watchdog timer expired\n", dev->name); + pr_debug("%s: netwave_watchdog: watchdog timer expired\n", dev->name); netwave_reset(dev); dev->trans_start = jiffies; netif_wake_queue(dev); @@ -1211,7 +1195,7 @@ static int netwave_rx(struct net_device *dev) int i; u_char *ptr; - DEBUG(3, "xinw_rx: Receiving ... \n"); + pr_debug("xinw_rx: Receiving ... \n"); /* Receive max 10 packets for now. */ for (i = 0; i < 10; i++) { @@ -1237,7 +1221,7 @@ static int netwave_rx(struct net_device *dev) skb = dev_alloc_skb(rcvLen+5); if (skb == NULL) { - DEBUG(1, "netwave_rx: Could not allocate an sk_buff of " + pr_debug("netwave_rx: Could not allocate an sk_buff of " "length %d\n", rcvLen); ++dev->stats.rx_dropped; /* Tell the adapter to skip the packet */ @@ -1279,7 +1263,7 @@ static int netwave_rx(struct net_device *dev) wait_WOC(iobase); writeb(NETWAVE_CMD_SRP, ramBase + NETWAVE_EREG_CB + 0); writeb(NETWAVE_CMD_EOC, ramBase + NETWAVE_EREG_CB + 1); - DEBUG(3, "Packet reception ok\n"); + pr_debug("Packet reception ok\n"); } return 0; } @@ -1288,7 +1272,7 @@ static int netwave_open(struct net_device *dev) { netwave_private *priv = netdev_priv(dev); struct pcmcia_device *link = priv->p_dev; - DEBUG(1, "netwave_open: starting.\n"); + dev_dbg(&link->dev, "netwave_open: starting.\n"); if (!pcmcia_dev_present(link)) return -ENODEV; @@ -1305,7 +1289,7 @@ static int netwave_close(struct net_device *dev) { netwave_private *priv = netdev_priv(dev); struct pcmcia_device *link = priv->p_dev; - DEBUG(1, "netwave_close: finishing.\n"); + dev_dbg(&link->dev, "netwave_close: finishing.\n"); link->open--; netif_stop_queue(dev); @@ -1358,11 +1342,11 @@ static void set_multicast_list(struct net_device *dev) u_char rcvMode = 0; #ifdef PCMCIA_DEBUG - if (pc_debug > 2) { - static int old; + { + xstatic int old; if (old != dev->mc_count) { old = dev->mc_count; - DEBUG(0, "%s: setting Rx mode to %d addresses.\n", + pr_debug("%s: setting Rx mode to %d addresses.\n", dev->name, dev->mc_count); } } diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c index 38c1c9d..da626ec 100644 --- a/drivers/net/wireless/orinoco/orinoco_cs.c +++ b/drivers/net/wireless/orinoco/orinoco_cs.c @@ -160,12 +160,6 @@ static void orinoco_cs_detach(struct pcmcia_device *link) * device available to the system. */ -#define CS_CHECK(fn, ret) do { \ - last_fn = (fn); \ - if ((last_ret = (ret)) != 0) \ - goto cs_failed; \ -} while (0) - static int orinoco_cs_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -240,7 +234,7 @@ orinoco_cs_config(struct pcmcia_device *link) struct orinoco_private *priv = link->priv; struct orinoco_pccard *card = priv->card; hermes_t *hw = &priv->hw; - int last_fn, last_ret; + int ret; void __iomem *mem; /* @@ -257,13 +251,12 @@ orinoco_cs_config(struct pcmcia_device *link) * and most client drivers will only use the CIS to fill in * implementation-defined details. */ - last_ret = pcmcia_loop_config(link, orinoco_cs_config_check, NULL); - if (last_ret) { + ret = pcmcia_loop_config(link, orinoco_cs_config_check, NULL); + if (ret) { if (!ignore_cis_vcc) printk(KERN_ERR PFX "GetNextTuple(): No matching " "CIS configuration. Maybe you need the " "ignore_cis_vcc=1 parameter.\n"); - cs_error(link, RequestIO, last_ret); goto failed; } @@ -272,14 +265,16 @@ orinoco_cs_config(struct pcmcia_device *link) * a handler to the interrupt, unless the 'Handler' member of * the irq structure is initialized. */ - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; /* We initialize the hermes structure before completing PCMCIA * configuration just in case the interrupt handler gets * called. */ mem = ioport_map(link->io.BasePort1, link->io.NumPorts1); if (!mem) - goto cs_failed; + goto failed; hermes_struct_init(hw, mem, HERMES_16BIT_REGSPACING); @@ -288,8 +283,9 @@ orinoco_cs_config(struct pcmcia_device *link) * the I/O windows and the interrupt mapping, and putting the * card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* Ok, we have the configuration, prepare to register the netdev */ card->node.major = card->node.minor = 0; @@ -315,9 +311,6 @@ orinoco_cs_config(struct pcmcia_device *link) * net_device has been registered */ return 0; - cs_failed: - cs_error(link, last_fn, last_ret); - failed: orinoco_cs_release(link); return -ENODEV; diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c index c361310..700010e 100644 --- a/drivers/net/wireless/orinoco/spectrum_cs.c +++ b/drivers/net/wireless/orinoco/spectrum_cs.c @@ -73,9 +73,6 @@ static void spectrum_cs_release(struct pcmcia_device *link); #define HCR_MEM16 0x10 /* memory width bit, should be preserved */ -#define CS_CHECK(fn, ret) \ - do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - /* * Reset the card using configuration registers COR and CCSR. * If IDLE is 1, stop the firmware, so that it can be safely rewritten. @@ -83,7 +80,7 @@ static void spectrum_cs_release(struct pcmcia_device *link); static int spectrum_reset(struct pcmcia_device *link, int idle) { - int last_ret, last_fn; + int ret; conf_reg_t reg; u_int save_cor; @@ -95,23 +92,26 @@ spectrum_reset(struct pcmcia_device *link, int idle) reg.Function = 0; reg.Action = CS_READ; reg.Offset = CISREG_COR; - CS_CHECK(AccessConfigurationRegister, - pcmcia_access_configuration_register(link, ®)); + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; save_cor = reg.Value; /* Soft-Reset card */ reg.Action = CS_WRITE; reg.Offset = CISREG_COR; reg.Value = (save_cor | COR_SOFT_RESET); - CS_CHECK(AccessConfigurationRegister, - pcmcia_access_configuration_register(link, ®)); + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; udelay(1000); /* Read CCSR */ reg.Action = CS_READ; reg.Offset = CISREG_CCSR; - CS_CHECK(AccessConfigurationRegister, - pcmcia_access_configuration_register(link, ®)); + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; /* * Start or stop the firmware. Memory width bit should be @@ -120,21 +120,22 @@ spectrum_reset(struct pcmcia_device *link, int idle) reg.Action = CS_WRITE; reg.Offset = CISREG_CCSR; reg.Value = (idle ? HCR_IDLE : HCR_RUN) | (reg.Value & HCR_MEM16); - CS_CHECK(AccessConfigurationRegister, - pcmcia_access_configuration_register(link, ®)); + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; udelay(1000); /* Restore original COR configuration index */ reg.Action = CS_WRITE; reg.Offset = CISREG_COR; reg.Value = (save_cor & ~COR_SOFT_RESET); - CS_CHECK(AccessConfigurationRegister, - pcmcia_access_configuration_register(link, ®)); + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; udelay(1000); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); +failed: return -ENODEV; } @@ -307,7 +308,7 @@ spectrum_cs_config(struct pcmcia_device *link) struct orinoco_private *priv = link->priv; struct orinoco_pccard *card = priv->card; hermes_t *hw = &priv->hw; - int last_fn, last_ret; + int ret; void __iomem *mem; /* @@ -324,13 +325,12 @@ spectrum_cs_config(struct pcmcia_device *link) * and most client drivers will only use the CIS to fill in * implementation-defined details. */ - last_ret = pcmcia_loop_config(link, spectrum_cs_config_check, NULL); - if (last_ret) { + ret = pcmcia_loop_config(link, spectrum_cs_config_check, NULL); + if (ret) { if (!ignore_cis_vcc) printk(KERN_ERR PFX "GetNextTuple(): No matching " "CIS configuration. Maybe you need the " "ignore_cis_vcc=1 parameter.\n"); - cs_error(link, RequestIO, last_ret); goto failed; } @@ -339,14 +339,16 @@ spectrum_cs_config(struct pcmcia_device *link) * a handler to the interrupt, unless the 'Handler' member of * the irq structure is initialized. */ - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; /* We initialize the hermes structure before completing PCMCIA * configuration just in case the interrupt handler gets * called. */ mem = ioport_map(link->io.BasePort1, link->io.NumPorts1); if (!mem) - goto cs_failed; + goto failed; hermes_struct_init(hw, mem, HERMES_16BIT_REGSPACING); @@ -355,8 +357,9 @@ spectrum_cs_config(struct pcmcia_device *link) * the I/O windows and the interrupt mapping, and putting the * card and host interface into "Memory and IO" mode. */ - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* Ok, we have the configuration, prepare to register the netdev */ card->node.major = card->node.minor = 0; @@ -386,9 +389,6 @@ spectrum_cs_config(struct pcmcia_device *link) * net_device has been registered */ return 0; - cs_failed: - cs_error(link, last_fn, last_ret); - failed: spectrum_cs_release(link); return -ENODEV; diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index 431a20e..2fad4ac 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -3656,10 +3656,7 @@ wv_pcmcia_reset(struct net_device * dev) i = pcmcia_access_configuration_register(link, ®); if (i != 0) - { - cs_error(link, AccessConfigurationRegister, i); return FALSE; - } #ifdef DEBUG_CONFIG_INFO printk(KERN_DEBUG "%s: wavelan_pcmcia_reset(): Config reg is 0x%x\n", @@ -3670,19 +3667,13 @@ wv_pcmcia_reset(struct net_device * dev) reg.Value = reg.Value | COR_SW_RESET; i = pcmcia_access_configuration_register(link, ®); if (i != 0) - { - cs_error(link, AccessConfigurationRegister, i); return FALSE; - } reg.Action = CS_WRITE; reg.Value = COR_LEVEL_IRQ | COR_CONFIG; i = pcmcia_access_configuration_register(link, ®); if (i != 0) - { - cs_error(link, AccessConfigurationRegister, i); return FALSE; - } #ifdef DEBUG_CONFIG_TRACE printk(KERN_DEBUG "%s: <-wv_pcmcia_reset()\n", dev->name); @@ -3857,10 +3848,7 @@ wv_pcmcia_config(struct pcmcia_device * link) { i = pcmcia_request_io(link, &link->io); if (i != 0) - { - cs_error(link, RequestIO, i); break; - } /* * Now allocate an interrupt line. Note that this does not @@ -3868,10 +3856,7 @@ wv_pcmcia_config(struct pcmcia_device * link) */ i = pcmcia_request_irq(link, &link->irq); if (i != 0) - { - cs_error(link, RequestIRQ, i); break; - } /* * This actually configures the PCMCIA socket -- setting up @@ -3880,10 +3865,7 @@ wv_pcmcia_config(struct pcmcia_device * link) link->conf.ConfigIndex = 1; i = pcmcia_request_configuration(link, &link->conf); if (i != 0) - { - cs_error(link, RequestConfiguration, i); break; - } /* * Allocate a small memory window. Note that the struct pcmcia_device @@ -3896,10 +3878,7 @@ wv_pcmcia_config(struct pcmcia_device * link) req.AccessSpeed = mem_speed; i = pcmcia_request_window(&link, &req, &link->win); if (i != 0) - { - cs_error(link, RequestWindow, i); break; - } lp->mem = ioremap(req.Base, req.Size); dev->mem_start = (u_long)lp->mem; @@ -3908,10 +3887,7 @@ wv_pcmcia_config(struct pcmcia_device * link) mem.CardOffset = 0; mem.Page = 0; i = pcmcia_map_mem_page(link->win, &mem); if (i != 0) - { - cs_error(link, MapMemPage, i); break; - } /* Feed device with this info... */ dev->irq = link->irq.AssignedIRQ; diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 4f1e0cf..7e8e269 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -67,23 +67,7 @@ /* For rough constant delay */ #define WL3501_NOPLOOP(n) { int x = 0; while (x++ < n) slow_down_io(); } -/* - * All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If you do not - * define PCMCIA_DEBUG at all, all the debug code will be left out. If you - * compile with PCMCIA_DEBUG=0, the debug code will be present but disabled -- - * but it can then be enabled for specific modules at load time with a - * 'pc_debug=#' option to insmod. - */ -#define PCMCIA_DEBUG 0 -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define dprintk(n, format, args...) \ - { if (pc_debug > (n)) \ - printk(KERN_INFO "%s: " format "\n", __func__ , ##args); } -#else -#define dprintk(n, format, args...) -#endif + #define wl3501_outb(a, b) { outb(a, b); slow_down_io(); } #define wl3501_outb_p(a, b) { outb_p(a, b); slow_down_io(); } @@ -684,10 +668,10 @@ static void wl3501_mgmt_scan_confirm(struct wl3501_card *this, u16 addr) int matchflag = 0; struct wl3501_scan_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); if (sig.status == WL3501_STATUS_SUCCESS) { - dprintk(3, "success"); + pr_debug("success"); if ((this->net_type == IW_MODE_INFRA && (sig.cap_info & WL3501_MGMT_CAPABILITY_ESS)) || (this->net_type == IW_MODE_ADHOC && @@ -722,7 +706,7 @@ static void wl3501_mgmt_scan_confirm(struct wl3501_card *this, u16 addr) } } } else if (sig.status == WL3501_STATUS_TIMEOUT) { - dprintk(3, "timeout"); + pr_debug("timeout"); this->join_sta_bss = 0; for (i = this->join_sta_bss; i < this->bss_cnt; i++) if (!wl3501_mgmt_join(this, i)) @@ -879,7 +863,7 @@ static int wl3501_mgmt_auth(struct wl3501_card *this) .timeout = 1000, }; - dprintk(3, "entry"); + pr_debug("entry"); memcpy(sig.mac_addr, this->bssid, ETH_ALEN); return wl3501_esbq_exec(this, &sig, sizeof(sig)); } @@ -893,7 +877,7 @@ static int wl3501_mgmt_association(struct wl3501_card *this) .cap_info = this->cap_info, }; - dprintk(3, "entry"); + pr_debug("entry"); memcpy(sig.mac_addr, this->bssid, ETH_ALEN); return wl3501_esbq_exec(this, &sig, sizeof(sig)); } @@ -903,7 +887,7 @@ static void wl3501_mgmt_join_confirm(struct net_device *dev, u16 addr) struct wl3501_card *this = netdev_priv(dev); struct wl3501_join_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); if (sig.status == WL3501_STATUS_SUCCESS) { if (this->net_type == IW_MODE_INFRA) { @@ -962,7 +946,7 @@ static inline void wl3501_md_confirm_interrupt(struct net_device *dev, { struct wl3501_md_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); wl3501_free_tx_buffer(this, sig.data); if (netif_queue_stopped(dev)) @@ -1017,7 +1001,7 @@ static inline void wl3501_md_ind_interrupt(struct net_device *dev, static inline void wl3501_get_confirm_interrupt(struct wl3501_card *this, u16 addr, void *sig, int size) { - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &this->sig_get_confirm, sizeof(this->sig_get_confirm)); wake_up(&this->wait); @@ -1029,7 +1013,7 @@ static inline void wl3501_start_confirm_interrupt(struct net_device *dev, { struct wl3501_start_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); if (sig.status == WL3501_STATUS_SUCCESS) netif_wake_queue(dev); @@ -1041,7 +1025,7 @@ static inline void wl3501_assoc_confirm_interrupt(struct net_device *dev, struct wl3501_card *this = netdev_priv(dev); struct wl3501_assoc_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); if (sig.status == WL3501_STATUS_SUCCESS) @@ -1053,7 +1037,7 @@ static inline void wl3501_auth_confirm_interrupt(struct wl3501_card *this, { struct wl3501_auth_confirm sig; - dprintk(3, "entry"); + pr_debug("entry"); wl3501_get_from_wla(this, addr, &sig, sizeof(sig)); if (sig.status == WL3501_STATUS_SUCCESS) @@ -1069,7 +1053,7 @@ static inline void wl3501_rx_interrupt(struct net_device *dev) u8 sig_id; struct wl3501_card *this = netdev_priv(dev); - dprintk(3, "entry"); + pr_debug("entry"); loop: morepkts = 0; if (!wl3501_esbq_confirm(this)) @@ -1302,7 +1286,7 @@ static int wl3501_reset(struct net_device *dev) wl3501_ack_interrupt(this); wl3501_unblock_interrupt(this); wl3501_mgmt_scan(this, 100); - dprintk(1, "%s: device reset", dev->name); + pr_debug("%s: device reset", dev->name); rc = 0; out: return rc; @@ -1376,7 +1360,7 @@ static int wl3501_open(struct net_device *dev) link->open++; /* Initial WL3501 firmware */ - dprintk(1, "%s: Initialize WL3501 firmware...", dev->name); + pr_debug("%s: Initialize WL3501 firmware...", dev->name); if (wl3501_init_firmware(this)) goto fail; /* Initial device variables */ @@ -1388,7 +1372,7 @@ static int wl3501_open(struct net_device *dev) wl3501_unblock_interrupt(this); wl3501_mgmt_scan(this, 100); rc = 0; - dprintk(1, "%s: WL3501 opened", dev->name); + pr_debug("%s: WL3501 opened", dev->name); printk(KERN_INFO "%s: Card Name: %s\n" "%s: Firmware Date: %s\n", dev->name, this->card_name, @@ -1945,9 +1929,6 @@ out_link: return -ENOMEM; } -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - /** * wl3501_config - configure the PCMCIA socket and make eth device available * @link - FILL_IN @@ -1959,7 +1940,7 @@ do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) static int wl3501_config(struct pcmcia_device *link) { struct net_device *dev = link->priv; - int i = 0, j, last_fn, last_ret; + int i = 0, j, ret; struct wl3501_card *this; /* Try allocating IO ports. This tries a few fixed addresses. If you @@ -1975,20 +1956,22 @@ static int wl3501_config(struct pcmcia_device *link) if (i == 0) break; } - if (i != 0) { - cs_error(link, RequestIO, i); + if (i != 0) goto failed; - } /* Now allocate an interrupt line. Note that this does not actually * assign a handler to the interrupt. */ - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; /* This actually configures the PCMCIA socket -- setting up the I/O * windows and the interrupt mapping. */ - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; @@ -2041,8 +2024,6 @@ static int wl3501_config(struct pcmcia_device *link) netif_start_queue(dev); return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: wl3501_release(link); return -ENODEV; -- cgit v0.10.2 From 3e7166178a83fef690dcbfcdaeda192f7282a9a4 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:54:14 +0200 Subject: pcmcia: use dynamic debug infrastructure, deprecate CS_CHECK (scsi) Convert PCMCIA drivers to use the dynamic debug infrastructure, instead of requiring manual settings of PCMCIA_DEBUG. Also, remove all usages of the CS_CHECK macro and replace them with proper Linux style calling and return value checking. The extra error reporting may be dropped, as the PCMCIA core already complains about any (non-driver-author) errors. CC: linux-scsi@vger.kernel.org Signed-off-by: Dominik Brodowski diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c index 67cde01..4329e4e 100644 --- a/drivers/scsi/pcmcia/aha152x_stub.c +++ b/drivers/scsi/pcmcia/aha152x_stub.c @@ -54,15 +54,6 @@ #include #include -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"aha152x_cs.c 1.54 2000/06/12 21:27:25 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -103,7 +94,7 @@ static int aha152x_probe(struct pcmcia_device *link) { scsi_info_t *info; - DEBUG(0, "aha152x_attach()\n"); + dev_dbg(&link->dev, "aha152x_attach()\n"); /* Create new SCSI device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -127,7 +118,7 @@ static int aha152x_probe(struct pcmcia_device *link) static void aha152x_detach(struct pcmcia_device *link) { - DEBUG(0, "aha152x_detach(0x%p)\n", link); + dev_dbg(&link->dev, "aha152x_detach\n"); aha152x_release_cs(link); @@ -137,9 +128,6 @@ static void aha152x_detach(struct pcmcia_device *link) /*====================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int aha152x_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -164,19 +152,22 @@ static int aha152x_config_cs(struct pcmcia_device *link) { scsi_info_t *info = link->priv; struct aha152x_setup s; - int last_ret, last_fn; + int ret; struct Scsi_Host *host; - DEBUG(0, "aha152x_config(0x%p)\n", link); + dev_dbg(&link->dev, "aha152x_config\n"); - last_ret = pcmcia_loop_config(link, aha152x_config_check, NULL); - if (last_ret) { - cs_error(link, RequestIO, last_ret); - goto failed; - } + ret = pcmcia_loop_config(link, aha152x_config_check, NULL); + if (ret) + goto failed; - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* Set configuration options for the aha152x driver */ memset(&s, 0, sizeof(s)); @@ -194,7 +185,7 @@ static int aha152x_config_cs(struct pcmcia_device *link) host = aha152x_probe_one(&s); if (host == NULL) { printk(KERN_INFO "aha152x_cs: no SCSI devices found\n"); - goto cs_failed; + goto failed; } sprintf(info->node.dev_name, "scsi%d", host->host_no); @@ -203,8 +194,6 @@ static int aha152x_config_cs(struct pcmcia_device *link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: aha152x_release_cs(link); return -ENODEV; diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c index 06254f4..5792b55 100644 --- a/drivers/scsi/pcmcia/fdomain_stub.c +++ b/drivers/scsi/pcmcia/fdomain_stub.c @@ -59,16 +59,6 @@ MODULE_AUTHOR("David Hinds "); MODULE_DESCRIPTION("Future Domain PCMCIA SCSI driver"); MODULE_LICENSE("Dual MPL/GPL"); -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"fdomain_cs.c 1.47 2001/10/13 00:08:52 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif - /*====================================================================*/ typedef struct scsi_info_t { @@ -86,7 +76,7 @@ static int fdomain_probe(struct pcmcia_device *link) { scsi_info_t *info; - DEBUG(0, "fdomain_attach()\n"); + dev_dbg(&link->dev, "fdomain_attach()\n"); /* Create new SCSI device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -111,7 +101,7 @@ static int fdomain_probe(struct pcmcia_device *link) static void fdomain_detach(struct pcmcia_device *link) { - DEBUG(0, "fdomain_detach(0x%p)\n", link); + dev_dbg(&link->dev, "fdomain_detach\n"); fdomain_release(link); @@ -120,9 +110,6 @@ static void fdomain_detach(struct pcmcia_device *link) /*====================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int fdomain_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -137,20 +124,22 @@ static int fdomain_config_check(struct pcmcia_device *p_dev, static int fdomain_config(struct pcmcia_device *link) { scsi_info_t *info = link->priv; - int last_ret, last_fn; + int ret; char str[22]; struct Scsi_Host *host; - DEBUG(0, "fdomain_config(0x%p)\n", link); + dev_dbg(&link->dev, "fdomain_config\n"); - last_ret = pcmcia_loop_config(link, fdomain_config_check, NULL); - if (last_ret) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, fdomain_config_check, NULL); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* A bad hack... */ release_region(link->io.BasePort1, link->io.NumPorts1); @@ -162,11 +151,11 @@ static int fdomain_config(struct pcmcia_device *link) host = __fdomain_16x0_detect(&fdomain_driver_template); if (!host) { printk(KERN_INFO "fdomain_cs: no SCSI devices found\n"); - goto cs_failed; + goto failed; } if (scsi_add_host(host, NULL)) - goto cs_failed; + goto failed; scsi_scan_host(host); sprintf(info->node.dev_name, "scsi%d", host->host_no); @@ -175,8 +164,6 @@ static int fdomain_config(struct pcmcia_device *link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: fdomain_release(link); return -ENODEV; @@ -188,7 +175,7 @@ static void fdomain_release(struct pcmcia_device *link) { scsi_info_t *info = link->priv; - DEBUG(0, "fdomain_release(0x%p)\n", link); + dev_dbg(&link->dev, "fdomain_release\n"); scsi_remove_host(info->host); pcmcia_disable_device(link); diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c index 20c3e5e..65d7ad58 100644 --- a/drivers/scsi/pcmcia/qlogic_stub.c +++ b/drivers/scsi/pcmcia/qlogic_stub.c @@ -62,15 +62,6 @@ static char qlogic_name[] = "qlogic_cs"; -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = "qlogic_cs.c 1.79-ac 2002/10/26 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif - static struct scsi_host_template qlogicfas_driver_template = { .module = THIS_MODULE, .name = qlogic_name, @@ -159,7 +150,7 @@ static int qlogic_probe(struct pcmcia_device *link) { scsi_info_t *info; - DEBUG(0, "qlogic_attach()\n"); + dev_dbg(&link->dev, "qlogic_attach()\n"); /* Create new SCSI device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -183,7 +174,7 @@ static int qlogic_probe(struct pcmcia_device *link) static void qlogic_detach(struct pcmcia_device *link) { - DEBUG(0, "qlogic_detach(0x%p)\n", link); + dev_dbg(&link->dev, "qlogic_detach\n"); qlogic_release(link); kfree(link->priv); @@ -192,9 +183,6 @@ static void qlogic_detach(struct pcmcia_device *link) /*====================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int qlogic_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -213,19 +201,22 @@ static int qlogic_config_check(struct pcmcia_device *p_dev, static int qlogic_config(struct pcmcia_device * link) { scsi_info_t *info = link->priv; - int last_ret, last_fn; + int ret; struct Scsi_Host *host; - DEBUG(0, "qlogic_config(0x%p)\n", link); + dev_dbg(&link->dev, "qlogic_config\n"); - last_ret = pcmcia_loop_config(link, qlogic_config_check, NULL); - if (last_ret) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, qlogic_config_check, NULL); + if (ret) + goto failed; + + ret = pcmcia_request_irq(link, &link->irq); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; if ((info->manf_id == MANFID_MACNICA) || (info->manf_id == MANFID_PIONEER) || (info->manf_id == 0x0098)) { /* set ATAcmd */ @@ -244,7 +235,7 @@ static int qlogic_config(struct pcmcia_device * link) if (!host) { printk(KERN_INFO "%s: no SCSI devices found\n", qlogic_name); - goto cs_failed; + goto failed; } sprintf(info->node.dev_name, "scsi%d", host->host_no); @@ -253,12 +244,9 @@ static int qlogic_config(struct pcmcia_device * link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); - pcmcia_disable_device(link); failed: + pcmcia_disable_device(link); return -ENODEV; - } /* qlogic_config */ /*====================================================================*/ @@ -267,7 +255,7 @@ static void qlogic_release(struct pcmcia_device *link) { scsi_info_t *info = link->priv; - DEBUG(0, "qlogic_release(0x%p)\n", link); + dev_dbg(&link->dev, "qlogic_release\n"); scsi_remove_host(info->host); diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c index b330c11..851a41c 100644 --- a/drivers/scsi/pcmcia/sym53c500_cs.c +++ b/drivers/scsi/pcmcia/sym53c500_cs.c @@ -77,17 +77,6 @@ #include #include -/* ================================================================== */ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"sym53c500_cs.c 0.9c 2004/10/27 (Bob Tracy)"; -#else -#define DEBUG(n, args...) -#endif /* ================================================================== */ @@ -525,7 +514,7 @@ SYM53C500_release(struct pcmcia_device *link) struct scsi_info_t *info = link->priv; struct Scsi_Host *shost = info->host; - DEBUG(0, "SYM53C500_release(0x%p)\n", link); + dev_dbg(&link->dev, "SYM53C500_release\n"); /* * Do this before releasing/freeing resources. @@ -697,9 +686,6 @@ static struct scsi_host_template sym53c500_driver_template = { .shost_attrs = SYM53C500_shost_attrs }; -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int SYM53C500_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -719,24 +705,27 @@ static int SYM53C500_config(struct pcmcia_device *link) { struct scsi_info_t *info = link->priv; - int last_ret, last_fn; + int ret; int irq_level, port_base; struct Scsi_Host *host; struct scsi_host_template *tpnt = &sym53c500_driver_template; struct sym53c500_data *data; - DEBUG(0, "SYM53C500_config(0x%p)\n", link); + dev_dbg(&link->dev, "SYM53C500_config\n"); info->manf_id = link->manf_id; - last_ret = pcmcia_loop_config(link, SYM53C500_config_check, NULL); - if (last_ret) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, SYM53C500_config_check, NULL); + if (ret) + goto failed; + + ret = pcmcia_request_irq(link, &link->irq); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* * That's the trouble with copying liberally from another driver. @@ -824,8 +813,6 @@ err_release: printk(KERN_INFO "sym53c500_cs: no SCSI devices found\n"); return -ENODEV; -cs_failed: - cs_error(link, last_fn, last_ret); failed: SYM53C500_release(link); return -ENODEV; @@ -855,7 +842,7 @@ static int sym53c500_resume(struct pcmcia_device *link) static void SYM53C500_detach(struct pcmcia_device *link) { - DEBUG(0, "SYM53C500_detach(0x%p)\n", link); + dev_dbg(&link->dev, "SYM53C500_detach\n"); SYM53C500_release(link); @@ -868,7 +855,7 @@ SYM53C500_probe(struct pcmcia_device *link) { struct scsi_info_t *info; - DEBUG(0, "SYM53C500_attach()\n"); + dev_dbg(&link->dev, "SYM53C500_attach()\n"); /* Create new SCSI device */ info = kzalloc(sizeof(*info), GFP_KERNEL); -- cgit v0.10.2 From 9ec0bf41b5030ccc691049754ed1398cad5e953e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:54:46 +0200 Subject: pcmcia: use dynamic debug infrastructure, deprecate CS_CHECK (serial_cs) Convert PCMCIA drivers to use the dynamic debug infrastructure, instead of requiring manual settings of PCMCIA_DEBUG. Also, remove all usages of the CS_CHECK macro and replace them with proper Linux style calling and return value checking. The extra error reporting may be dropped, as the PCMCIA core already complains about any (non-driver-author) errors. CC: linux-serial@vger.kernel.org CC: Russell King Signed-off-by: Dominik Brodowski diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c index 7bf02cf..3b31bee 100644 --- a/drivers/serial/serial_cs.c +++ b/drivers/serial/serial_cs.c @@ -54,14 +54,6 @@ #include "8250.h" -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = "serial_cs.c 1.134 2002/05/04 05:48:53 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -121,24 +113,20 @@ static void quirk_setup_brainboxes_0104(struct pcmcia_device *link, struct uart_ static int quirk_post_ibm(struct pcmcia_device *link) { conf_reg_t reg = { 0, CS_READ, 0x800, 0 }; - int last_ret, last_fn; + int ret; + + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; - last_ret = pcmcia_access_configuration_register(link, ®); - if (last_ret) { - last_fn = AccessConfigurationRegister; - goto cs_failed; - } reg.Action = CS_WRITE; reg.Value = reg.Value | 1; - last_ret = pcmcia_access_configuration_register(link, ®); - if (last_ret) { - last_fn = AccessConfigurationRegister; - goto cs_failed; - } + ret = pcmcia_access_configuration_register(link, ®); + if (ret) + goto failed; return 0; - cs_failed: - cs_error(link, last_fn, last_ret); + failed: return -ENODEV; } @@ -283,7 +271,7 @@ static void serial_remove(struct pcmcia_device *link) struct serial_info *info = link->priv; int i; - DEBUG(0, "serial_release(0x%p)\n", link); + dev_dbg(&link->dev, "serial_release\n"); /* * Recheck to see if the device is still configured. @@ -334,7 +322,7 @@ static int serial_probe(struct pcmcia_device *link) { struct serial_info *info; - DEBUG(0, "serial_attach()\n"); + dev_dbg(&link->dev, "serial_attach()\n"); /* Create new serial device */ info = kzalloc(sizeof (*info), GFP_KERNEL); @@ -370,7 +358,7 @@ static void serial_detach(struct pcmcia_device *link) { struct serial_info *info = link->priv; - DEBUG(0, "serial_detach(0x%p)\n", link); + dev_dbg(&link->dev, "serial_detach\n"); /* * Ensure any outstanding scheduled tasks are completed. @@ -507,15 +495,13 @@ static int simple_config(struct pcmcia_device *link) printk(KERN_NOTICE "serial_cs: no usable port range found, giving up\n"); - cs_error(link, RequestIO, i); return -1; found_port: i = pcmcia_request_irq(link, &link->irq); - if (i != 0) { - cs_error(link, RequestIRQ, i); + if (i != 0) link->irq.AssignedIRQ = 0; - } + if (info->multi && (info->manfid == MANFID_3COM)) link->conf.ConfigIndex &= ~(0x08); @@ -526,10 +512,8 @@ found_port: info->quirk->config(link); i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) return -1; - } return setup_serial(link, info, link->io.BasePort1, link->irq.AssignedIRQ); } @@ -598,7 +582,6 @@ static int multi_config(struct pcmcia_device *link) /* FIXME: comment does not fit, error handling does not fit */ printk(KERN_NOTICE "serial_cs: no usable port range found, giving up\n"); - cs_error(link, RequestIRQ, i); link->irq.AssignedIRQ = 0; } @@ -609,10 +592,8 @@ static int multi_config(struct pcmcia_device *link) info->quirk->config(link); i = pcmcia_request_configuration(link, &link->conf); - if (i != 0) { - cs_error(link, RequestConfiguration, i); + if (i != 0) return -ENODEV; - } /* The Oxford Semiconductor OXCF950 cards are in fact single-port: * 8 registers are for the UART, the others are extra registers. @@ -682,7 +663,7 @@ static int serial_config(struct pcmcia_device * link) struct serial_info *info = link->priv; int i; - DEBUG(0, "serial_config(0x%p)\n", link); + dev_dbg(&link->dev, "serial_config\n"); /* Is this a compliant multifunction card? */ info->multi = (link->socket->functions > 1); -- cgit v0.10.2 From 7c5af6ffd69bb2bb3c86b374153627529d67598c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:55:12 +0200 Subject: pcmcia: use dynamic debug infrastructure, deprecate CS_CHECK (sound) Convert PCMCIA drivers to use the dynamic debug infrastructure, instead of requiring manual settings of PCMCIA_DEBUG. Also, remove all usages of the CS_CHECK macro and replace them with proper Linux style calling and return value checking. The extra error reporting may be dropped, as the PCMCIA core already complains about any (non-driver-author) errors. CC: Jaroslav Kysela CC: alsa-devel@alsa-project.org Signed-off-by: Dominik Brodowski diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c index 7dea74b..64b8599 100644 --- a/sound/pcmcia/pdaudiocf/pdaudiocf.c +++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c @@ -217,20 +217,25 @@ static void snd_pdacf_detach(struct pcmcia_device *link) * configuration callback */ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int pdacf_config(struct pcmcia_device *link) { struct snd_pdacf *pdacf = link->priv; - int last_fn, last_ret; + int ret; snd_printdd(KERN_DEBUG "pdacf_config called\n"); link->conf.ConfigIndex = 0x5; - CS_CHECK(RequestIO, pcmcia_request_io(link, &link->io)); - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_io(link, &link->io); + if (ret) + goto failed; + + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; if (snd_pdacf_assign_resources(pdacf, link->io.BasePort1, link->irq.AssignedIRQ) < 0) goto failed; @@ -238,8 +243,6 @@ static int pdacf_config(struct pcmcia_device *link) link->dev_node = &pdacf->node; return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: pcmcia_disable_device(link); return -ENODEV; diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index 7445cc8..1492744 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -213,14 +213,11 @@ static int snd_vxpocket_assign_resources(struct vx_core *chip, int port, int irq * configuration callback */ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int vxpocket_config(struct pcmcia_device *link) { struct vx_core *chip = link->priv; struct snd_vxpocket *vxp = (struct snd_vxpocket *)chip; - int last_fn, last_ret; + int ret; snd_printdd(KERN_DEBUG "vxpocket_config called\n"); @@ -235,9 +232,17 @@ static int vxpocket_config(struct pcmcia_device *link) strcpy(chip->card->driver, vxp440_hw.name); } - CS_CHECK(RequestIO, pcmcia_request_io(link, &link->io)); - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_io(link, &link->io); + if (ret) + goto failed; + + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; chip->dev = &handle_to_dev(link); snd_card_set_dev(chip->card, chip->dev); @@ -248,8 +253,6 @@ static int vxpocket_config(struct pcmcia_device *link) link->dev_node = &vxp->node; return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: pcmcia_disable_device(link); return -ENODEV; -- cgit v0.10.2 From 9b44de2015ff4a2ed1d56efedfcc72b917d356a6 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:55:39 +0200 Subject: pcmcia: use dynamic debug infrastructure, deprecate CS_CHECK (misc drivers) Convert PCMCIA drivers to use the dynamic debug infrastructure, instead of requiring manual settings of PCMCIA_DEBUG. Also, remove all usages of the CS_CHECK macro and replace them with proper Linux style calling and return value checking. The extra error reporting may be dropped, as the PCMCIA core already complains about any (non-driver-author) errors. CC: linux-mtd@lists.infradead.org CC: linux-usb@vger.kernel.org Signed-off-by: Dominik Brodowski diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index 5c3546e..b698dba 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -119,10 +119,8 @@ static caddr_t remap_window(struct map_info *map, unsigned long to) dev->offset, mrq.CardOffset); mrq.Page = 0; ret = pcmcia_map_mem_page(win, &mrq); - if (ret != 0) { - cs_error(dev->p_dev, MapMemPage, ret); + if (ret != 0) return NULL; - } dev->offset = mrq.CardOffset; } return dev->win_base + (to & (dev->win_size-1)); @@ -327,8 +325,6 @@ static void pcmciamtd_set_vpp(struct map_info *map, int on) DEBUG(2, "dev = %p on = %d vpp = %d\n", dev, on, dev->vpp); ret = pcmcia_modify_configuration(link, &mod); - if (ret != 0) - cs_error(link, ModifyConfiguration, ret); } @@ -490,16 +486,12 @@ static void card_settings(struct pcmciamtd_dev *dev, struct pcmcia_device *link, * MTD device available to the system. */ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int pcmciamtd_config(struct pcmcia_device *link) { struct pcmciamtd_dev *dev = link->priv; struct mtd_info *mtd = NULL; cs_status_t status; win_req_t req; - int last_ret = 0, last_fn = 0; int ret; int i; static char *probes[] = { "jedec_probe", "cfi_probe" }; @@ -586,7 +578,6 @@ static int pcmciamtd_config(struct pcmcia_device *link) DEBUG(2, "Setting Configuration"); ret = pcmcia_request_configuration(link, &link->conf); if (ret != 0) { - cs_error(link, RequestConfiguration, ret); if (dev->win_base) { iounmap(dev->win_base); dev->win_base = NULL; @@ -661,8 +652,7 @@ static int pcmciamtd_config(struct pcmcia_device *link) link->dev_node = &dev->node; return 0; - cs_failed: - cs_error(link, last_fn, last_ret); + failed: err("CS Error, exiting"); pcmciamtd_release(link); return -ENODEV; diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c index 8fdfa4f..e56a4de 100644 --- a/drivers/parport/parport_cs.c +++ b/drivers/parport/parport_cs.c @@ -67,14 +67,6 @@ MODULE_LICENSE("Dual MPL/GPL"); INT_MODULE_PARM(epp_mode, 1); -#ifdef PCMCIA_DEBUG -INT_MODULE_PARM(pc_debug, PCMCIA_DEBUG); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = -"parport_cs.c 1.29 2002/10/11 06:57:41 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif /*====================================================================*/ @@ -103,7 +95,7 @@ static int parport_probe(struct pcmcia_device *link) { parport_info_t *info; - DEBUG(0, "parport_attach()\n"); + dev_dbg(&link->dev, "parport_attach()\n"); /* Create new parport device */ info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -132,7 +124,7 @@ static int parport_probe(struct pcmcia_device *link) static void parport_detach(struct pcmcia_device *link) { - DEBUG(0, "parport_detach(0x%p)\n", link); + dev_dbg(&link->dev, "parport_detach\n"); parport_cs_release(link); @@ -147,9 +139,6 @@ static void parport_detach(struct pcmcia_device *link) ======================================================================*/ -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static int parport_config_check(struct pcmcia_device *p_dev, cistpl_cftable_entry_t *cfg, cistpl_cftable_entry_t *dflt, @@ -178,18 +167,20 @@ static int parport_config(struct pcmcia_device *link) { parport_info_t *info = link->priv; struct parport *p; - int last_ret, last_fn; + int ret; - DEBUG(0, "parport_config(0x%p)\n", link); + dev_dbg(&link->dev, "parport_config\n"); - last_ret = pcmcia_loop_config(link, parport_config_check, NULL); - if (last_ret) { - cs_error(link, RequestIO, last_ret); + ret = pcmcia_loop_config(link, parport_config_check, NULL); + if (ret) goto failed; - } - CS_CHECK(RequestIRQ, pcmcia_request_irq(link, &link->irq)); - CS_CHECK(RequestConfiguration, pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; p = parport_pc_probe_port(link->io.BasePort1, link->io.BasePort2, link->irq.AssignedIRQ, PARPORT_DMA_NONE, @@ -213,8 +204,6 @@ static int parport_config(struct pcmcia_device *link) return 0; -cs_failed: - cs_error(link, last_fn, last_ret); failed: parport_cs_release(link); return -ENODEV; @@ -232,7 +221,7 @@ static void parport_cs_release(struct pcmcia_device *link) { parport_info_t *info = link->priv; - DEBUG(0, "parport_release(0x%p)\n", link); + dev_dbg(&link->dev, "parport_release\n"); if (info->ndev) { struct parport *p = info->port; diff --git a/drivers/telephony/ixj_pcmcia.c b/drivers/telephony/ixj_pcmcia.c index 347c3ed..d442fd3 100644 --- a/drivers/telephony/ixj_pcmcia.c +++ b/drivers/telephony/ixj_pcmcia.c @@ -19,13 +19,6 @@ * PCMCIA service support for Quicknet cards */ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -#else -#define DEBUG(n, args...) -#endif typedef struct ixj_info_t { int ndev; @@ -39,7 +32,7 @@ static void ixj_cs_release(struct pcmcia_device * link); static int ixj_probe(struct pcmcia_device *p_dev) { - DEBUG(0, "ixj_attach()\n"); + dev_dbg(&p_dev->dev, "ixj_attach()\n"); /* Create new ixj device */ p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; p_dev->io.Attributes2 = IO_DATA_PATH_WIDTH_8; @@ -55,33 +48,30 @@ static int ixj_probe(struct pcmcia_device *p_dev) static void ixj_detach(struct pcmcia_device *link) { - DEBUG(0, "ixj_detach(0x%p)\n", link); + dev_dbg(&link->dev, "ixj_detach\n"); ixj_cs_release(link); kfree(link->priv); } -#define CS_CHECK(fn, ret) \ -do { last_fn = (fn); if ((last_ret = (ret)) != 0) goto cs_failed; } while (0) - static void ixj_get_serial(struct pcmcia_device * link, IXJ * j) { char *str; int i, place; - DEBUG(0, "ixj_get_serial(0x%p)\n", link); + dev_dbg(&link->dev, "ixj_get_serial\n"); str = link->prod_id[0]; if (!str) - goto cs_failed; + goto failed; printk("%s", str); str = link->prod_id[1]; if (!str) - goto cs_failed; + goto failed; printk(" %s", str); str = link->prod_id[2]; if (!str) - goto cs_failed; + goto failed; place = 1; for (i = strlen(str) - 1; i >= 0; i--) { switch (str[i]) { @@ -118,9 +108,9 @@ static void ixj_get_serial(struct pcmcia_device * link, IXJ * j) } str = link->prod_id[3]; if (!str) - goto cs_failed; + goto failed; printk(" version %s\n", str); - cs_failed: +failed: return; } @@ -151,13 +141,13 @@ static int ixj_config(struct pcmcia_device * link) cistpl_cftable_entry_t dflt = { 0 }; info = link->priv; - DEBUG(0, "ixj_config(0x%p)\n", link); + dev_dbg(&link->dev, "ixj_config\n"); if (pcmcia_loop_config(link, ixj_config_check, &dflt)) - goto cs_failed; + goto failed; if (pcmcia_request_configuration(link, &link->conf)) - goto cs_failed; + goto failed; /* * Register the card with the core. @@ -170,7 +160,7 @@ static int ixj_config(struct pcmcia_device * link) ixj_get_serial(link, j); return 0; - cs_failed: +failed: ixj_cs_release(link); return -ENODEV; } @@ -178,7 +168,7 @@ static int ixj_config(struct pcmcia_device * link) static void ixj_cs_release(struct pcmcia_device *link) { ixj_info_t *info = link->priv; - DEBUG(0, "ixj_cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "ixj_cs_release\n"); info->ndev = 0; pcmcia_disable_device(link); } diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c index 516848d..4607742 100644 --- a/drivers/usb/host/sl811_cs.c +++ b/drivers/usb/host/sl811_cs.c @@ -37,28 +37,8 @@ MODULE_LICENSE("GPL"); /* MACROS */ /*====================================================================*/ -#if defined(DEBUG) || defined(PCMCIA_DEBUG) - -static int pc_debug = 0; -module_param(pc_debug, int, 0644); - -#define DBG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG "sl811_cs: " args) - -#else -#define DBG(n, args...) do{}while(0) -#endif /* no debugging */ - #define INFO(args...) printk(KERN_INFO "sl811_cs: " args) -#define INT_MODULE_PARM(n, v) static int n = v; module_param(n, int, 0444) - -#define CS_CHECK(fn, ret) \ - do { \ - last_fn = (fn); \ - if ((last_ret = (ret)) != 0) \ - goto cs_failed; \ - } while (0) - /*====================================================================*/ /* VARIABLES */ /*====================================================================*/ @@ -76,7 +56,7 @@ static void sl811_cs_release(struct pcmcia_device * link); static void release_platform_dev(struct device * dev) { - DBG(0, "sl811_cs platform_dev release\n"); + dev_dbg(dev, "sl811_cs platform_dev release\n"); dev->parent = NULL; } @@ -140,7 +120,7 @@ static int sl811_hc_init(struct device *parent, resource_size_t base_addr, static void sl811_cs_detach(struct pcmcia_device *link) { - DBG(0, "sl811_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "sl811_cs_detach\n"); sl811_cs_release(link); @@ -150,7 +130,7 @@ static void sl811_cs_detach(struct pcmcia_device *link) static void sl811_cs_release(struct pcmcia_device * link) { - DBG(0, "sl811_cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "sl811_cs_release\n"); pcmcia_disable_device(link); platform_device_unregister(&platform_dev); @@ -207,9 +187,9 @@ static int sl811_cs_config(struct pcmcia_device *link) { struct device *parent = &handle_to_dev(link); local_info_t *dev = link->priv; - int last_fn, last_ret; + int ret; - DBG(0, "sl811_cs_config(0x%p)\n", link); + dev_dbg(&link->dev, "sl811_cs_config\n"); if (pcmcia_loop_config(link, sl811_cs_config_check, NULL)) goto failed; @@ -217,14 +197,16 @@ static int sl811_cs_config(struct pcmcia_device *link) /* require an IRQ and two registers */ if (!link->io.NumPorts1 || link->io.NumPorts1 < 2) goto failed; - if (link->conf.Attributes & CONF_ENABLE_IRQ) - CS_CHECK(RequestIRQ, - pcmcia_request_irq(link, &link->irq)); - else + if (link->conf.Attributes & CONF_ENABLE_IRQ) { + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; + } else goto failed; - CS_CHECK(RequestConfiguration, - pcmcia_request_configuration(link, &link->conf)); + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; sprintf(dev->node.dev_name, driver_name); dev->node.major = dev->node.minor = 0; @@ -241,8 +223,6 @@ static int sl811_cs_config(struct pcmcia_device *link) if (sl811_hc_init(parent, link->io.BasePort1, link->irq.AssignedIRQ) < 0) { -cs_failed: - cs_error(link, last_fn, last_ret); failed: printk(KERN_WARNING "sl811_cs_config failed\n"); sl811_cs_release(link); -- cgit v0.10.2 From 9cb495bb41f07a3ebfc60d3b9d26017a1fd7050c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 15:57:22 +0200 Subject: pcmcia: remove now-defunct cs_error, pcmcia_error_{func,ret} As all in-tree drivers have been converted to not use cs_error() any more, drop these functions and definitions, and update the Documentation. Signed-off-by: Dominik Brodowski diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt index adfb83e..446f43b 100644 --- a/Documentation/pcmcia/driver-changes.txt +++ b/Documentation/pcmcia/driver-changes.txt @@ -1,5 +1,10 @@ This file details changes in 2.6 which affect PCMCIA card driver authors: +* no cs_error / CS_CHECK / CONFIG_PCMCIA_DEBUG (as of 2.6.33) + Instead of the cs_error() callback or the CS_CHECK() macro, please use + Linux-style checking of return values, and -- if necessary -- debug + messages using "dev_dbg()" or "pr_debug()". + * New CIS tuple access (as of 2.6.33) Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 5b069ae..05893d4 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -46,107 +46,6 @@ spinlock_t pcmcia_dev_list_lock; /*====================================================================*/ -/* code which was in cs.c before */ - -/* String tables for error messages */ - -typedef struct lookup_t { - const int key; - const char *msg; -} lookup_t; - -static const lookup_t error_table[] = { - { 0, "Operation succeeded" }, - { -EIO, "Input/Output error" }, - { -ENODEV, "No card present" }, - { -EINVAL, "Bad parameter" }, - { -EACCES, "Configuration locked" }, - { -EBUSY, "Resource in use" }, - { -ENOSPC, "No more items" }, - { -ENOMEM, "Out of resource" }, -}; - - -static const lookup_t service_table[] = { - { AccessConfigurationRegister, "AccessConfigurationRegister" }, - { AddSocketServices, "AddSocketServices" }, - { AdjustResourceInfo, "AdjustResourceInfo" }, - { CheckEraseQueue, "CheckEraseQueue" }, - { CloseMemory, "CloseMemory" }, - { DeregisterClient, "DeregisterClient" }, - { DeregisterEraseQueue, "DeregisterEraseQueue" }, - { GetCardServicesInfo, "GetCardServicesInfo" }, - { GetClientInfo, "GetClientInfo" }, - { GetConfigurationInfo, "GetConfigurationInfo" }, - { GetEventMask, "GetEventMask" }, - { GetFirstClient, "GetFirstClient" }, - { GetFirstRegion, "GetFirstRegion" }, - { GetFirstTuple, "GetFirstTuple" }, - { GetNextClient, "GetNextClient" }, - { GetNextRegion, "GetNextRegion" }, - { GetNextTuple, "GetNextTuple" }, - { GetStatus, "GetStatus" }, - { GetTupleData, "GetTupleData" }, - { MapMemPage, "MapMemPage" }, - { ModifyConfiguration, "ModifyConfiguration" }, - { ModifyWindow, "ModifyWindow" }, - { OpenMemory, "OpenMemory" }, - { ParseTuple, "ParseTuple" }, - { ReadMemory, "ReadMemory" }, - { RegisterClient, "RegisterClient" }, - { RegisterEraseQueue, "RegisterEraseQueue" }, - { RegisterMTD, "RegisterMTD" }, - { ReleaseConfiguration, "ReleaseConfiguration" }, - { ReleaseIO, "ReleaseIO" }, - { ReleaseIRQ, "ReleaseIRQ" }, - { ReleaseWindow, "ReleaseWindow" }, - { RequestConfiguration, "RequestConfiguration" }, - { RequestIO, "RequestIO" }, - { RequestIRQ, "RequestIRQ" }, - { RequestSocketMask, "RequestSocketMask" }, - { RequestWindow, "RequestWindow" }, - { ResetCard, "ResetCard" }, - { SetEventMask, "SetEventMask" }, - { ValidateCIS, "ValidateCIS" }, - { WriteMemory, "WriteMemory" }, - { BindDevice, "BindDevice" }, - { BindMTD, "BindMTD" }, - { ReportError, "ReportError" }, - { SuspendCard, "SuspendCard" }, - { ResumeCard, "ResumeCard" }, - { EjectCard, "EjectCard" }, - { InsertCard, "InsertCard" }, - { ReplaceCIS, "ReplaceCIS" } -}; - -const char *pcmcia_error_func(int func) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(service_table); i++) - if (service_table[i].key == func) - return service_table[i].msg; - - return "Unknown service number"; -} -EXPORT_SYMBOL(pcmcia_error_func); - -const char *pcmcia_error_ret(int ret) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(error_table); i++) - if (error_table[i].key == ret) - return error_table[i].msg; - - return "unknown"; -} -EXPORT_SYMBOL(pcmcia_error_ret); - -/*======================================================================*/ - - - static void pcmcia_check_driver(struct pcmcia_driver *p_drv) { struct pcmcia_device_id *did = p_drv->id_table; diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 6c37d4e..d82392d 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -142,42 +142,6 @@ struct pcmcia_device { #define handle_to_dev(handle) (handle->dev) -/* (deprecated) error reporting by PCMCIA devices. Use dev_printk() - * or dev_dbg() directly in the driver, without referring to pcmcia_error_func() - * and/or pcmcia_error_ret() for those functions will go away soon. - */ -enum service { - AccessConfigurationRegister, AddSocketServices, - AdjustResourceInfo, CheckEraseQueue, CloseMemory, CopyMemory, - DeregisterClient, DeregisterEraseQueue, GetCardServicesInfo, - GetClientInfo, GetConfigurationInfo, GetEventMask, - GetFirstClient, GetFirstPartion, GetFirstRegion, GetFirstTuple, - GetNextClient, GetNextPartition, GetNextRegion, GetNextTuple, - GetStatus, GetTupleData, MapLogSocket, MapLogWindow, MapMemPage, - MapPhySocket, MapPhyWindow, ModifyConfiguration, ModifyWindow, - OpenMemory, ParseTuple, ReadMemory, RegisterClient, - RegisterEraseQueue, RegisterMTD, RegisterTimer, - ReleaseConfiguration, ReleaseExclusive, ReleaseIO, ReleaseIRQ, - ReleaseSocketMask, ReleaseWindow, ReplaceSocketServices, - RequestConfiguration, RequestExclusive, RequestIO, RequestIRQ, - RequestSocketMask, RequestWindow, ResetCard, ReturnSSEntry, - SetEventMask, SetRegion, ValidateCIS, VendorSpecific, - WriteMemory, BindDevice, BindMTD, ReportError, - SuspendCard, ResumeCard, EjectCard, InsertCard, ReplaceCIS, - GetFirstWindow, GetNextWindow, GetMemPage -}; -const char *pcmcia_error_func(int func); -const char *pcmcia_error_ret(int ret); - -#define cs_error(p_dev, func, ret) \ - { \ - dev_printk(KERN_NOTICE, &p_dev->dev, \ - "%s : %s\n", \ - pcmcia_error_func(func), \ - pcmcia_error_ret(ret)); \ - } - - /* * CIS access. * -- cgit v0.10.2 From a7149f9a26eb44a5658d56335c23104ba529e9f6 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 24 Oct 2009 18:07:16 +0200 Subject: pcmcia: use dev_dbg and dev_print in pd6729.c As suggested by Wolfram Sang , use dev_dbg(), and dev_{err,warn,info}() in pd6729.c, and add some "\n" suggested by Komuro . In the ISR, use pr_devel() and dev_vdbg() as they are only compiled if DEBUG (or, for dev_vdbg(), VERBOSE_DEBUG) are set explicitly. CC: Komuro Acked-by: Wolfram Sang Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/pd6729.c b/drivers/pcmcia/pd6729.c index 70a3346..e1741cd 100644 --- a/drivers/pcmcia/pd6729.c +++ b/drivers/pcmcia/pd6729.c @@ -213,7 +213,8 @@ static irqreturn_t pd6729_interrupt(int irq, void *dev) if (csc & I365_CSC_DETECT) { events |= SS_DETECT; - dprintk("Card detected in socket %i!\n", i); + dev_vdbg(&socket[i].socket.dev, + "Card detected in socket %i!\n", i); } if (indirect_read(&socket[i], I365_INTCTL) @@ -331,11 +332,11 @@ static int pd6729_set_socket(struct pcmcia_socket *sock, socket_state_t *state) reg = I365_PWR_NORESET; /* default: disable resetdrv on resume */ if (state->flags & SS_PWR_AUTO) { - dprintk("Auto power\n"); + dev_dbg(&sock->dev, "Auto power\n"); reg |= I365_PWR_AUTO; /* automatic power mngmnt */ } if (state->flags & SS_OUTPUT_ENA) { - dprintk("Power Enabled\n"); + dev_dbg(&sock->dev, "Power Enabled\n"); reg |= I365_PWR_OUT; /* enable power */ } @@ -343,40 +344,44 @@ static int pd6729_set_socket(struct pcmcia_socket *sock, socket_state_t *state) case 0: break; case 33: - dprintk("setting voltage to Vcc to 3.3V on socket %i\n", + dev_dbg(&sock->dev, + "setting voltage to Vcc to 3.3V on socket %i\n", socket->number); reg |= I365_VCC_5V; indirect_setbit(socket, PD67_MISC_CTL_1, PD67_MC1_VCC_3V); break; case 50: - dprintk("setting voltage to Vcc to 5V on socket %i\n", + dev_dbg(&sock->dev, + "setting voltage to Vcc to 5V on socket %i\n", socket->number); reg |= I365_VCC_5V; indirect_resetbit(socket, PD67_MISC_CTL_1, PD67_MC1_VCC_3V); break; default: - dprintk("pd6729: pd6729_set_socket called with " - "invalid VCC power value: %i\n", - state->Vcc); + dev_dbg(&sock->dev, + "pd6729_set_socket called with invalid VCC power " + "value: %i\n", state->Vcc); return -EINVAL; } switch (state->Vpp) { case 0: - dprintk("not setting Vpp on socket %i\n", socket->number); + dev_dbg(&sock->dev, "not setting Vpp on socket %i\n", + socket->number); break; case 33: case 50: - dprintk("setting Vpp to Vcc for socket %i\n", socket->number); + dev_dbg(&sock->dev, "setting Vpp to Vcc for socket %i\n", + socket->number); reg |= I365_VPP1_5V; break; case 120: - dprintk("setting Vpp to 12.0\n"); + dev_dbg(&sock->dev, "setting Vpp to 12.0\n"); reg |= I365_VPP1_12V; break; default: - dprintk("pd6729: pd6729_set_socket called with invalid VPP power value: %i\n", - state->Vpp); + dev_dbg(&sock->dev, "pd6729: pd6729_set_socket called with " + "invalid VPP power value: %i\n", state->Vpp); return -EINVAL; } @@ -438,7 +443,7 @@ static int pd6729_set_io_map(struct pcmcia_socket *sock, /* Check error conditions */ if (map > 1) { - dprintk("pd6729_set_io_map with invalid map"); + dev_dbg(&sock->dev, "pd6729_set_io_map with invalid map\n"); return -EINVAL; } @@ -446,7 +451,7 @@ static int pd6729_set_io_map(struct pcmcia_socket *sock, if (indirect_read(socket, I365_ADDRWIN) & I365_ENA_IO(map)) indirect_resetbit(socket, I365_ADDRWIN, I365_ENA_IO(map)); - /* dprintk("set_io_map: Setting range to %x - %x\n", + /* dev_dbg(&sock->dev, "set_io_map: Setting range to %x - %x\n", io->start, io->stop);*/ /* write the new values */ @@ -478,12 +483,12 @@ static int pd6729_set_mem_map(struct pcmcia_socket *sock, map = mem->map; if (map > 4) { - printk("pd6729_set_mem_map: invalid map"); + dev_warn(&sock->dev, "invalid map requested\n"); return -EINVAL; } if ((mem->res->start > mem->res->end) || (mem->speed > 1000)) { - printk("pd6729_set_mem_map: invalid address / speed"); + dev_warn(&sock->dev, "invalid invalid address / speed\n"); return -EINVAL; } @@ -529,12 +534,12 @@ static int pd6729_set_mem_map(struct pcmcia_socket *sock, if (mem->flags & MAP_WRPROT) i |= I365_MEM_WRPROT; if (mem->flags & MAP_ATTRIB) { - /* dprintk("requesting attribute memory for socket %i\n", - socket->number);*/ + /* dev_dbg(&sock->dev, "requesting attribute memory for " + "socket %i\n", socket->number);*/ i |= I365_MEM_REG; } else { - /* dprintk("requesting normal memory for socket %i\n", - socket->number);*/ + /* dev_dbg(&sock->dev, "requesting normal memory for " + "socket %i\n", socket->number);*/ } indirect_write16(socket, base + I365_W_OFF, i); @@ -577,7 +582,7 @@ static struct pccard_operations pd6729_operations = { static irqreturn_t pd6729_test(int irq, void *dev) { - dprintk("-> hit on irq %d\n", irq); + pr_devel("-> hit on irq %d\n", irq); return IRQ_HANDLED; } @@ -642,13 +647,13 @@ static int __devinit pd6729_pci_probe(struct pci_dev *dev, goto err_out_free_mem; if (!pci_resource_start(dev, 0)) { - printk(KERN_INFO "pd6729: refusing to load the driver " - "as the io_base is 0.\n"); + dev_warn(&dev->dev, "refusing to load the driver as the " + "io_base is NULL.\n"); goto err_out_free_mem; } - printk(KERN_INFO "pd6729: Cirrus PD6729 PCI to PCMCIA Bridge " - "at 0x%llx on irq %d\n", + dev_info(&dev->dev, "Cirrus PD6729 PCI to PCMCIA Bridge at 0x%llx " + "on irq %d\n", (unsigned long long)pci_resource_start(dev, 0), dev->irq); /* * Since we have no memory BARs some firmware may not @@ -656,14 +661,14 @@ static int __devinit pd6729_pci_probe(struct pci_dev *dev, */ pci_read_config_byte(dev, PCI_COMMAND, &configbyte); if (!(configbyte & PCI_COMMAND_MEMORY)) { - printk(KERN_DEBUG "pd6729: Enabling PCI_COMMAND_MEMORY.\n"); + dev_dbg(&dev->dev, "pd6729: Enabling PCI_COMMAND_MEMORY.\n"); configbyte |= PCI_COMMAND_MEMORY; pci_write_config_byte(dev, PCI_COMMAND, configbyte); } ret = pci_request_regions(dev, "pd6729"); if (ret) { - printk(KERN_INFO "pd6729: pci request region failed.\n"); + dev_warn(&dev->dev, "pci request region failed.\n"); goto err_out_disable; } @@ -672,7 +677,7 @@ static int __devinit pd6729_pci_probe(struct pci_dev *dev, mask = pd6729_isa_scan(); if (irq_mode == 0 && mask == 0) { - printk(KERN_INFO "pd6729: no ISA interrupt is available.\n"); + dev_warn(&dev->dev, "no ISA interrupt is available.\n"); goto err_out_free_res; } @@ -697,8 +702,8 @@ static int __devinit pd6729_pci_probe(struct pci_dev *dev, /* Register the interrupt handler */ if ((ret = request_irq(dev->irq, pd6729_interrupt, IRQF_SHARED, "pd6729", socket))) { - printk(KERN_ERR "pd6729: Failed to register irq %d, " - "aborting\n", dev->irq); + dev_err(&dev->dev, "Failed to register irq %d\n", + dev->irq); goto err_out_free_res; } } else { @@ -713,8 +718,7 @@ static int __devinit pd6729_pci_probe(struct pci_dev *dev, for (i = 0; i < MAX_SOCKETS; i++) { ret = pcmcia_register_socket(&socket[i].socket); if (ret) { - printk(KERN_INFO "pd6729: pcmcia_register_socket " - "failed.\n"); + dev_warn(&dev->dev, "pcmcia_register_socket failed.\n"); for (j = 0; j < i ; j++) pcmcia_unregister_socket(&socket[j].socket); goto err_out_free_res2; diff --git a/drivers/pcmcia/pd6729.h b/drivers/pcmcia/pd6729.h index f392e45..41418d3 100644 --- a/drivers/pcmcia/pd6729.h +++ b/drivers/pcmcia/pd6729.h @@ -1,13 +1,6 @@ #ifndef _INCLUDE_GUARD_PD6729_H_ #define _INCLUDE_GUARD_PD6729_H_ -/* Debuging defines */ -#ifdef NOTRACE -#define dprintk(fmt, args...) printk(fmt , ## args) -#else -#define dprintk(fmt, args...) do {} while (0) -#endif - /* Flags for I365_GENCTL */ #define I365_DF_VS1 0x40 /* DF-step Voltage Sense */ #define I365_DF_VS2 0x80 -- cgit v0.10.2 From 0f767de6a26a07f7d58394512b6f6c96322f047f Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 26 Mar 2009 21:14:19 +0000 Subject: PCMCIA: soc_common: convert to a stand alone module Convert soc_common.c to be a stand alone module, rather than wrapping it up into the individual SoC specific base modules. In doing this, we need to add init/exit functions for soc_common to register/remove the cpufreq notifier. Signed-off-by: Russell King Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig index 0c44ddd..f3ccbcc 100644 --- a/drivers/pcmcia/Kconfig +++ b/drivers/pcmcia/Kconfig @@ -178,9 +178,13 @@ config PCMCIA_BCM63XX tristate "bcm63xx pcmcia support" depends on BCM63XX && PCMCIA +config PCMCIA_SOC_COMMON + bool + config PCMCIA_SA1100 tristate "SA1100 support" depends on ARM && ARCH_SA1100 && PCMCIA + select PCMCIA_SOC_COMMON help Say Y here to include support for SA11x0-based PCMCIA or CF sockets, found on HP iPAQs, Yopy, and other StrongARM(R)/ @@ -191,6 +195,7 @@ config PCMCIA_SA1100 config PCMCIA_SA1111 tristate "SA1111 support" depends on ARM && ARCH_SA1100 && SA1111 && PCMCIA + select PCMCIA_SOC_COMMON help Say Y here to include support for SA1111-based PCMCIA or CF sockets, found on the Jornada 720, Graphicsmaster and other @@ -204,6 +209,7 @@ config PCMCIA_PXA2XX depends on (ARCH_LUBBOCK || MACH_MAINSTONE || PXA_SHARPSL \ || MACH_ARMCORE || ARCH_PXA_PALM || TRIZEPS_PCMCIA \ || ARCH_VIPER || ARCH_PXA_ESERIES || MACH_STARGATE2) + select PCMCIA_SOC_COMMON help Say Y here to include support for the PXA2xx PCMCIA controller diff --git a/drivers/pcmcia/Makefile b/drivers/pcmcia/Makefile index a03a38a..3829383 100644 --- a/drivers/pcmcia/Makefile +++ b/drivers/pcmcia/Makefile @@ -22,8 +22,9 @@ obj-$(CONFIG_I82365) += i82365.o obj-$(CONFIG_I82092) += i82092.o obj-$(CONFIG_TCIC) += tcic.o obj-$(CONFIG_PCMCIA_M8XX) += m8xx_pcmcia.o -obj-$(CONFIG_PCMCIA_SA1100) += sa11xx_core.o sa1100_cs.o -obj-$(CONFIG_PCMCIA_SA1111) += sa11xx_core.o sa1111_cs.o +obj-$(CONFIG_PCMCIA_SOC_COMMON) += soc_common.o +obj-$(CONFIG_PCMCIA_SA1100) += sa11xx_base.o sa1100_cs.o +obj-$(CONFIG_PCMCIA_SA1111) += sa11xx_base.o sa1111_cs.o obj-$(CONFIG_M32R_PCC) += m32r_pcc.o obj-$(CONFIG_M32R_CFC) += m32r_cfc.o obj-$(CONFIG_PCMCIA_AU1X00) += au1x00_ss.o @@ -35,9 +36,6 @@ obj-$(CONFIG_BFIN_CFPCMCIA) += bfin_cf_pcmcia.o obj-$(CONFIG_AT91_CF) += at91_cf.o obj-$(CONFIG_ELECTRA_CF) += electra_cf.o -sa11xx_core-y += soc_common.o sa11xx_base.o -pxa2xx_core-y += soc_common.o pxa2xx_base.o - au1x00_ss-y += au1000_generic.o au1x00_ss-$(CONFIG_MIPS_PB1000) += au1000_pb1x00.o au1x00_ss-$(CONFIG_MIPS_PB1100) += au1000_pb1x00.o @@ -77,4 +75,4 @@ pxa2xx-obj-$(CONFIG_MACH_PALMLD) += pxa2xx_palmld.o pxa2xx-obj-$(CONFIG_MACH_E740) += pxa2xx_e740.o pxa2xx-obj-$(CONFIG_MACH_STARGATE2) += pxa2xx_stargate2.o -obj-$(CONFIG_PCMCIA_PXA2XX) += pxa2xx_core.o $(pxa2xx-obj-y) +obj-$(CONFIG_PCMCIA_PXA2XX) += pxa2xx_base.o $(pxa2xx-obj-y) diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index ef7e9e5..a2d448f 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -609,15 +609,14 @@ static int soc_pcmcia_cpufreq_register(void) "notifier for PCMCIA (%d)\n", ret); return ret; } +fs_initcall(soc_pcmcia_cpufreq_register); static void soc_pcmcia_cpufreq_unregister(void) { cpufreq_unregister_notifier(&soc_pcmcia_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); } +module_exit(soc_pcmcia_cpufreq_unregister); -#else -static int soc_pcmcia_cpufreq_register(void) { return 0; } -static void soc_pcmcia_cpufreq_unregister(void) {} #endif int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, @@ -668,9 +667,6 @@ int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops goto out_err_5; } - if (list_empty(&soc_pcmcia_sockets)) - soc_pcmcia_cpufreq_register(); - list_add(&skt->node, &soc_pcmcia_sockets); /* @@ -743,6 +739,7 @@ int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops mutex_unlock(&soc_pcmcia_sockets_lock); return ret; } +EXPORT_SYMBOL(soc_common_drv_pcmcia_probe); int soc_common_drv_pcmcia_remove(struct device *dev) { @@ -773,9 +770,6 @@ int soc_common_drv_pcmcia_remove(struct device *dev) release_resource(&skt->res_io); release_resource(&skt->res_skt); } - if (list_empty(&soc_pcmcia_sockets)) - soc_pcmcia_cpufreq_unregister(); - mutex_unlock(&soc_pcmcia_sockets_lock); kfree(sinfo); @@ -783,3 +777,7 @@ int soc_common_drv_pcmcia_remove(struct device *dev) return 0; } EXPORT_SYMBOL(soc_common_drv_pcmcia_remove); + +MODULE_AUTHOR("John Dorsey "); +MODULE_DESCRIPTION("Linux PCMCIA Card Services: Common SoC support"); +MODULE_LICENSE("Dual MPL/GPL"); -- cgit v0.10.2 From 097e296d6175881eba7244de7222de61e9569911 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 26 Mar 2009 21:45:05 +0000 Subject: PCMCIA: soc_common: provide single socket add/remove functionality Factor out the functionality for adding and removing a single socket, thereby allowing SoCs to individually register each socket. The advantage of this approach is that SoCs can then extend soc_pcmcia_socket as they wish. Signed-off-by: Russell King Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index a2d448f..9bfa74a 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -574,7 +574,7 @@ void soc_pcmcia_enable_irqs(struct soc_pcmcia_socket *skt, EXPORT_SYMBOL(soc_pcmcia_enable_irqs); -LIST_HEAD(soc_pcmcia_sockets); +static LIST_HEAD(soc_pcmcia_sockets); static DEFINE_MUTEX(soc_pcmcia_sockets_lock); #ifdef CONFIG_CPU_FREQ @@ -619,158 +619,174 @@ module_exit(soc_pcmcia_cpufreq_unregister); #endif -int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, - struct skt_dev_info *sinfo) +void soc_pcmcia_remove_one(struct soc_pcmcia_socket *skt) { - struct soc_pcmcia_socket *skt; - int ret, i; - mutex_lock(&soc_pcmcia_sockets_lock); + del_timer_sync(&skt->poll_timer); - /* - * Initialise the per-socket structure. - */ - for (i = 0; i < sinfo->nskt; i++) { - skt = &sinfo->skt[i]; + pcmcia_unregister_socket(&skt->socket); - skt->socket.ops = &soc_common_pcmcia_operations; - skt->socket.owner = ops->owner; - skt->socket.dev.parent = dev; + flush_scheduled_work(); - init_timer(&skt->poll_timer); - skt->poll_timer.function = soc_common_pcmcia_poll_event; - skt->poll_timer.data = (unsigned long)skt; - skt->poll_timer.expires = jiffies + SOC_PCMCIA_POLL_PERIOD; + skt->ops->hw_shutdown(skt); - skt->dev = dev; - skt->ops = ops; + soc_common_pcmcia_config_skt(skt, &dead_socket); - ret = request_resource(&iomem_resource, &skt->res_skt); - if (ret) - goto out_err_1; + list_del(&skt->node); + mutex_unlock(&soc_pcmcia_sockets_lock); - ret = request_resource(&skt->res_skt, &skt->res_io); - if (ret) - goto out_err_2; + iounmap(skt->virt_io); + skt->virt_io = NULL; + release_resource(&skt->res_attr); + release_resource(&skt->res_mem); + release_resource(&skt->res_io); + release_resource(&skt->res_skt); +} +EXPORT_SYMBOL(soc_pcmcia_remove_one); - ret = request_resource(&skt->res_skt, &skt->res_mem); - if (ret) - goto out_err_3; +int soc_pcmcia_add_one(struct soc_pcmcia_socket *skt) +{ + int ret; - ret = request_resource(&skt->res_skt, &skt->res_attr); - if (ret) - goto out_err_4; + init_timer(&skt->poll_timer); + skt->poll_timer.function = soc_common_pcmcia_poll_event; + skt->poll_timer.data = (unsigned long)skt; + skt->poll_timer.expires = jiffies + SOC_PCMCIA_POLL_PERIOD; - skt->virt_io = ioremap(skt->res_io.start, 0x10000); - if (skt->virt_io == NULL) { - ret = -ENOMEM; - goto out_err_5; - } + ret = request_resource(&iomem_resource, &skt->res_skt); + if (ret) + goto out_err_1; - list_add(&skt->node, &soc_pcmcia_sockets); + ret = request_resource(&skt->res_skt, &skt->res_io); + if (ret) + goto out_err_2; - /* - * We initialize default socket timing here, because - * we are not guaranteed to see a SetIOMap operation at - * runtime. - */ - ops->set_timing(skt); + ret = request_resource(&skt->res_skt, &skt->res_mem); + if (ret) + goto out_err_3; - ret = ops->hw_init(skt); - if (ret) - goto out_err_6; + ret = request_resource(&skt->res_skt, &skt->res_attr); + if (ret) + goto out_err_4; - skt->socket.features = SS_CAP_STATIC_MAP|SS_CAP_PCCARD; - skt->socket.resource_ops = &pccard_static_ops; - skt->socket.irq_mask = 0; - skt->socket.map_size = PAGE_SIZE; - skt->socket.pci_irq = skt->irq; - skt->socket.io_offset = (unsigned long)skt->virt_io; + skt->virt_io = ioremap(skt->res_io.start, 0x10000); + if (skt->virt_io == NULL) { + ret = -ENOMEM; + goto out_err_5; + } - skt->status = soc_common_pcmcia_skt_state(skt); + mutex_lock(&soc_pcmcia_sockets_lock); - ret = pcmcia_register_socket(&skt->socket); - if (ret) - goto out_err_7; + list_add(&skt->node, &soc_pcmcia_sockets); - WARN_ON(skt->socket.sock != i); + /* + * We initialize default socket timing here, because + * we are not guaranteed to see a SetIOMap operation at + * runtime. + */ + skt->ops->set_timing(skt); - add_timer(&skt->poll_timer); + ret = skt->ops->hw_init(skt); + if (ret) + goto out_err_6; - ret = device_create_file(&skt->socket.dev, &dev_attr_status); - if (ret) - goto out_err_8; - } + skt->socket.ops = &soc_common_pcmcia_operations; + skt->socket.features = SS_CAP_STATIC_MAP|SS_CAP_PCCARD; + skt->socket.resource_ops = &pccard_static_ops; + skt->socket.irq_mask = 0; + skt->socket.map_size = PAGE_SIZE; + skt->socket.pci_irq = skt->irq; + skt->socket.io_offset = (unsigned long)skt->virt_io; - dev_set_drvdata(dev, sinfo); - ret = 0; - goto out; + skt->status = soc_common_pcmcia_skt_state(skt); - do { - skt = &sinfo->skt[i]; + ret = pcmcia_register_socket(&skt->socket); + if (ret) + goto out_err_7; + + add_timer(&skt->poll_timer); + + mutex_unlock(&soc_pcmcia_sockets_lock); + + ret = device_create_file(&skt->socket.dev, &dev_attr_status); + if (ret) + goto out_err_8; + + return ret; - device_remove_file(&skt->socket.dev, &dev_attr_status); out_err_8: - del_timer_sync(&skt->poll_timer); - pcmcia_unregister_socket(&skt->socket); + mutex_lock(&soc_pcmcia_sockets_lock); + del_timer_sync(&skt->poll_timer); + pcmcia_unregister_socket(&skt->socket); out_err_7: - flush_scheduled_work(); + flush_scheduled_work(); - ops->hw_shutdown(skt); + skt->ops->hw_shutdown(skt); out_err_6: - list_del(&skt->node); - iounmap(skt->virt_io); + list_del(&skt->node); + mutex_unlock(&soc_pcmcia_sockets_lock); + iounmap(skt->virt_io); out_err_5: - release_resource(&skt->res_attr); + release_resource(&skt->res_attr); out_err_4: - release_resource(&skt->res_mem); + release_resource(&skt->res_mem); out_err_3: - release_resource(&skt->res_io); + release_resource(&skt->res_io); out_err_2: - release_resource(&skt->res_skt); + release_resource(&skt->res_skt); out_err_1: - i--; - } while (i > 0); - kfree(sinfo); - - out: - mutex_unlock(&soc_pcmcia_sockets_lock); return ret; } -EXPORT_SYMBOL(soc_common_drv_pcmcia_probe); +EXPORT_SYMBOL(soc_pcmcia_add_one); -int soc_common_drv_pcmcia_remove(struct device *dev) +int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, + struct skt_dev_info *sinfo) { - struct skt_dev_info *sinfo = dev_get_drvdata(dev); - int i; + struct soc_pcmcia_socket *skt; + int ret, i; - dev_set_drvdata(dev, NULL); + /* + * Initialise the per-socket structure. + */ + for (i = ret = 0; i < sinfo->nskt; i++) { + skt = &sinfo->skt[i]; - mutex_lock(&soc_pcmcia_sockets_lock); - for (i = 0; i < sinfo->nskt; i++) { - struct soc_pcmcia_socket *skt = &sinfo->skt[i]; + skt->socket.owner = ops->owner; + skt->socket.dev.parent = dev; - del_timer_sync(&skt->poll_timer); + skt->dev = dev; + skt->ops = ops; - pcmcia_unregister_socket(&skt->socket); + ret = soc_pcmcia_add_one(skt); + if (ret) + break; - flush_scheduled_work(); + WARN_ON(skt->socket.sock != i); + } - skt->ops->hw_shutdown(skt); + if (ret) { + while (--i >= 0) + soc_pcmcia_remove_one(&sinfo->skt[i]); + kfree(sinfo); + } else { + dev_set_drvdata(dev, sinfo); + } - soc_common_pcmcia_config_skt(skt, &dead_socket); + return ret; +} +EXPORT_SYMBOL(soc_common_drv_pcmcia_probe); - list_del(&skt->node); - iounmap(skt->virt_io); - skt->virt_io = NULL; - release_resource(&skt->res_attr); - release_resource(&skt->res_mem); - release_resource(&skt->res_io); - release_resource(&skt->res_skt); - } - mutex_unlock(&soc_pcmcia_sockets_lock); +int soc_common_drv_pcmcia_remove(struct device *dev) +{ + struct skt_dev_info *sinfo = dev_get_drvdata(dev); + int i; + + dev_set_drvdata(dev, NULL); + + for (i = 0; i < sinfo->nskt; i++) + soc_pcmcia_remove_one(&sinfo->skt[i]); kfree(sinfo); diff --git a/drivers/pcmcia/soc_common.h b/drivers/pcmcia/soc_common.h index 290e143..51c72ba 100644 --- a/drivers/pcmcia/soc_common.h +++ b/drivers/pcmcia/soc_common.h @@ -135,7 +135,8 @@ extern void soc_pcmcia_enable_irqs(struct soc_pcmcia_socket *skt, struct pcmcia_ extern void soc_common_pcmcia_get_timing(struct soc_pcmcia_socket *, struct soc_pcmcia_timing *); -extern struct list_head soc_pcmcia_sockets; +void soc_pcmcia_remove_one(struct soc_pcmcia_socket *skt); +int soc_pcmcia_add_one(struct soc_pcmcia_socket *skt); extern int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, struct skt_dev_info *sinfo); extern int soc_common_drv_pcmcia_remove(struct device *dev); -- cgit v0.10.2 From be85458edce0f165cff62622f5e73b1d17b1e228 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Thu, 26 Mar 2009 22:21:18 +0000 Subject: PCMCIA: soc_common: push socket removal down to SoC specific support Mechanically transplant the removal code from soc_common into each SoC specific base support file, thereby allowing soc_common_drv_pcmcia_remove to be removed. No other changes. Signed-off-by: Russell King Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c index 0e35acb..8a91106 100644 --- a/drivers/pcmcia/pxa2xx_base.c +++ b/drivers/pcmcia/pxa2xx_base.c @@ -297,7 +297,16 @@ static int pxa2xx_drv_pcmcia_probe(struct platform_device *dev) static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev) { - return soc_common_drv_pcmcia_remove(&dev->dev); + struct skt_dev_info *sinfo = platform_get_drvdata(dev); + int i; + + platform_set_drvdata(dev, NULL); + + for (i = 0; i < sinfo->nskt; i++) + soc_pcmcia_remove_one(&sinfo->skt[i]); + + kfree(sinfo); + return 0; } static int pxa2xx_drv_pcmcia_suspend(struct device *dev) diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c index 2d0e997..11cc3ba 100644 --- a/drivers/pcmcia/sa1100_generic.c +++ b/drivers/pcmcia/sa1100_generic.c @@ -83,7 +83,16 @@ static int sa11x0_drv_pcmcia_probe(struct platform_device *dev) static int sa11x0_drv_pcmcia_remove(struct platform_device *dev) { - return soc_common_drv_pcmcia_remove(&dev->dev); + struct skt_dev_info *sinfo = platform_get_drvdata(dev); + int i; + + platform_set_drvdata(dev, NULL); + + for (i = 0; i < sinfo->nskt; i++) + soc_pcmcia_remove_one(&sinfo->skt[i]); + + kfree(sinfo); + return 0; } static int sa11x0_drv_pcmcia_suspend(struct platform_device *dev, diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index 4be4e17..a6793e3 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -152,7 +152,15 @@ static int pcmcia_probe(struct sa1111_dev *dev) static int __devexit pcmcia_remove(struct sa1111_dev *dev) { - soc_common_drv_pcmcia_remove(&dev->dev); + struct skt_dev_info *sinfo = dev_get_drvdata(&dev->dev); + int i; + + dev_set_drvdata(&dev->dev, NULL); + + for (i = 0; i < sinfo->nskt; i++) + soc_pcmcia_remove_one(&sinfo->skt[i]); + + kfree(sinfo); release_mem_region(dev->res.start, 512); return 0; } diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index 9bfa74a..fb5377d 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -778,22 +778,6 @@ int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops } EXPORT_SYMBOL(soc_common_drv_pcmcia_probe); -int soc_common_drv_pcmcia_remove(struct device *dev) -{ - struct skt_dev_info *sinfo = dev_get_drvdata(dev); - int i; - - dev_set_drvdata(dev, NULL); - - for (i = 0; i < sinfo->nskt; i++) - soc_pcmcia_remove_one(&sinfo->skt[i]); - - kfree(sinfo); - - return 0; -} -EXPORT_SYMBOL(soc_common_drv_pcmcia_remove); - MODULE_AUTHOR("John Dorsey "); MODULE_DESCRIPTION("Linux PCMCIA Card Services: Common SoC support"); MODULE_LICENSE("Dual MPL/GPL"); diff --git a/drivers/pcmcia/soc_common.h b/drivers/pcmcia/soc_common.h index 51c72ba..c33b8c3 100644 --- a/drivers/pcmcia/soc_common.h +++ b/drivers/pcmcia/soc_common.h @@ -139,7 +139,6 @@ void soc_pcmcia_remove_one(struct soc_pcmcia_socket *skt); int soc_pcmcia_add_one(struct soc_pcmcia_socket *skt); extern int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, struct skt_dev_info *sinfo); -extern int soc_common_drv_pcmcia_remove(struct device *dev); #ifdef CONFIG_PCMCIA_DEBUG -- cgit v0.10.2 From da4f007375197d6683461b995d404b01a7fdf2f5 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 29 Mar 2009 19:23:42 +0100 Subject: PCMCIA: soc_common: push socket probe down into SoC specific support Move the individual socket probing and initialization down into the SoC specific support files, thereby allowing soc_common_drv_pcmcia_probe to be eliminated. soc_common.c now no longer deals with distinct groups of sockets. Signed-off-by: Russell King Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c index 8a91106..3cb4fd2 100644 --- a/drivers/pcmcia/pxa2xx_base.c +++ b/drivers/pcmcia/pxa2xx_base.c @@ -228,6 +228,31 @@ static const char *skt_names[] = { #define SKT_DEV_INFO_SIZE(n) \ (sizeof(struct skt_dev_info) + (n)*sizeof(struct soc_pcmcia_socket)) +static int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt) +{ + skt->res_skt.start = _PCMCIA(skt->nr); + skt->res_skt.end = _PCMCIA(skt->nr) + PCMCIASp - 1; + skt->res_skt.name = skt_names[skt->nr]; + skt->res_skt.flags = IORESOURCE_MEM; + + skt->res_io.start = _PCMCIAIO(skt->nr); + skt->res_io.end = _PCMCIAIO(skt->nr) + PCMCIAIOSp - 1; + skt->res_io.name = "io"; + skt->res_io.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + skt->res_mem.start = _PCMCIAMem(skt->nr); + skt->res_mem.end = _PCMCIAMem(skt->nr) + PCMCIAMemSp - 1; + skt->res_mem.name = "memory"; + skt->res_mem.flags = IORESOURCE_MEM; + + skt->res_attr.start = _PCMCIAAttr(skt->nr); + skt->res_attr.end = _PCMCIAAttr(skt->nr) + PCMCIAAttrSp - 1; + skt->res_attr.name = "attribute"; + skt->res_attr.flags = IORESOURCE_MEM; + + return soc_pcmcia_add_one(skt); +} + int __pxa2xx_drv_pcmcia_probe(struct device *dev) { int i, ret; @@ -240,6 +265,12 @@ int __pxa2xx_drv_pcmcia_probe(struct device *dev) ops = (struct pcmcia_low_level *)dev->platform_data; + /* Provide our PXA2xx specific timing routines. */ + ops->set_timing = pxa2xx_pcmcia_set_timing; +#ifdef CONFIG_CPU_FREQ + ops->frequency_change = pxa2xx_pcmcia_frequency_change; +#endif + sinfo = kzalloc(SKT_DEV_INFO_SIZE(ops->nr), GFP_KERNEL); if (!sinfo) return -ENOMEM; @@ -250,40 +281,26 @@ int __pxa2xx_drv_pcmcia_probe(struct device *dev) for (i = 0; i < ops->nr; i++) { skt = &sinfo->skt[i]; - skt->nr = ops->first + i; - skt->irq = NO_IRQ; - - skt->res_skt.start = _PCMCIA(skt->nr); - skt->res_skt.end = _PCMCIA(skt->nr) + PCMCIASp - 1; - skt->res_skt.name = skt_names[skt->nr]; - skt->res_skt.flags = IORESOURCE_MEM; + skt->nr = ops->first + i; + skt->irq = NO_IRQ; + skt->dev = dev; + skt->ops = ops; + skt->socket.owner = ops->owner; + skt->socket.dev.parent = dev; - skt->res_io.start = _PCMCIAIO(skt->nr); - skt->res_io.end = _PCMCIAIO(skt->nr) + PCMCIAIOSp - 1; - skt->res_io.name = "io"; - skt->res_io.flags = IORESOURCE_MEM | IORESOURCE_BUSY; - - skt->res_mem.start = _PCMCIAMem(skt->nr); - skt->res_mem.end = _PCMCIAMem(skt->nr) + PCMCIAMemSp - 1; - skt->res_mem.name = "memory"; - skt->res_mem.flags = IORESOURCE_MEM; - - skt->res_attr.start = _PCMCIAAttr(skt->nr); - skt->res_attr.end = _PCMCIAAttr(skt->nr) + PCMCIAAttrSp - 1; - skt->res_attr.name = "attribute"; - skt->res_attr.flags = IORESOURCE_MEM; + ret = pxa2xx_drv_pcmcia_add_one(skt); + if (ret) + break; } - /* Provide our PXA2xx specific timing routines. */ - ops->set_timing = pxa2xx_pcmcia_set_timing; -#ifdef CONFIG_CPU_FREQ - ops->frequency_change = pxa2xx_pcmcia_frequency_change; -#endif - - ret = soc_common_drv_pcmcia_probe(dev, ops, sinfo); - - if (!ret) + if (ret) { + while (--i >= 0) + soc_pcmcia_remove_one(&sinfo->skt[i]); + kfree(sinfo); + } else { pxa2xx_configure_sockets(dev); + dev_set_drvdata(dev, sinfo); + } return ret; } diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index e15d59f..92a4348 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -171,6 +171,31 @@ static const char *skt_names[] = { #define SKT_DEV_INFO_SIZE(n) \ (sizeof(struct skt_dev_info) + (n)*sizeof(struct soc_pcmcia_socket)) +static int sa11xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt) +{ + skt->res_skt.start = _PCMCIA(skt->nr); + skt->res_skt.end = _PCMCIA(skt->nr) + PCMCIASp - 1; + skt->res_skt.name = skt_names[skt->nr]; + skt->res_skt.flags = IORESOURCE_MEM; + + skt->res_io.start = _PCMCIAIO(skt->nr); + skt->res_io.end = _PCMCIAIO(skt->nr) + PCMCIAIOSp - 1; + skt->res_io.name = "io"; + skt->res_io.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + skt->res_mem.start = _PCMCIAMem(skt->nr); + skt->res_mem.end = _PCMCIAMem(skt->nr) + PCMCIAMemSp - 1; + skt->res_mem.name = "memory"; + skt->res_mem.flags = IORESOURCE_MEM; + + skt->res_attr.start = _PCMCIAAttr(skt->nr); + skt->res_attr.end = _PCMCIAAttr(skt->nr) + PCMCIAAttrSp - 1; + skt->res_attr.name = "attribute"; + skt->res_attr.flags = IORESOURCE_MEM; + + return soc_pcmcia_add_one(skt); +} + int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr) { @@ -178,40 +203,6 @@ int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, struct soc_pcmcia_socket *skt; int i; - sinfo = kzalloc(SKT_DEV_INFO_SIZE(nr), GFP_KERNEL); - if (!sinfo) - return -ENOMEM; - - sinfo->nskt = nr; - - /* Initiliaze processor specific parameters */ - for (i = 0; i < nr; i++) { - skt = &sinfo->skt[i]; - - skt->nr = first + i; - skt->irq = NO_IRQ; - - skt->res_skt.start = _PCMCIA(skt->nr); - skt->res_skt.end = _PCMCIA(skt->nr) + PCMCIASp - 1; - skt->res_skt.name = skt_names[skt->nr]; - skt->res_skt.flags = IORESOURCE_MEM; - - skt->res_io.start = _PCMCIAIO(skt->nr); - skt->res_io.end = _PCMCIAIO(skt->nr) + PCMCIAIOSp - 1; - skt->res_io.name = "io"; - skt->res_io.flags = IORESOURCE_MEM | IORESOURCE_BUSY; - - skt->res_mem.start = _PCMCIAMem(skt->nr); - skt->res_mem.end = _PCMCIAMem(skt->nr) + PCMCIAMemSp - 1; - skt->res_mem.name = "memory"; - skt->res_mem.flags = IORESOURCE_MEM; - - skt->res_attr.start = _PCMCIAAttr(skt->nr); - skt->res_attr.end = _PCMCIAAttr(skt->nr) + PCMCIAAttrSp - 1; - skt->res_attr.name = "attribute"; - skt->res_attr.flags = IORESOURCE_MEM; - } - /* * set default MECR calculation if the board specific * code did not specify one... @@ -226,7 +217,37 @@ int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, ops->frequency_change = sa1100_pcmcia_frequency_change; #endif - return soc_common_drv_pcmcia_probe(dev, ops, sinfo); + sinfo = kzalloc(SKT_DEV_INFO_SIZE(nr), GFP_KERNEL); + if (!sinfo) + return -ENOMEM; + + sinfo->nskt = nr; + + /* Initiliaze processor specific parameters */ + for (i = 0; i < nr; i++) { + skt = &sinfo->skt[i]; + + skt->nr = first + i; + skt->irq = NO_IRQ; + skt->dev = dev; + skt->ops = ops; + skt->socket.owner = ops->owner; + skt->socket.dev.parent = dev; + + ret = sa11xx_drv_pcmcia_add_one(skt); + if (ret) + break; + } + + if (ret) { + while (--i >= 0) + soc_pcmcia_remove_one(&sinfo->skt[i]); + kfree(sinfo); + } else { + dev_set_drvdata(dev, sinfo); + } + + return ret; } EXPORT_SYMBOL(sa11xx_drv_pcmcia_probe); diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index fb5377d..6bc60f8 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -741,43 +741,6 @@ int soc_pcmcia_add_one(struct soc_pcmcia_socket *skt) } EXPORT_SYMBOL(soc_pcmcia_add_one); -int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, - struct skt_dev_info *sinfo) -{ - struct soc_pcmcia_socket *skt; - int ret, i; - - /* - * Initialise the per-socket structure. - */ - for (i = ret = 0; i < sinfo->nskt; i++) { - skt = &sinfo->skt[i]; - - skt->socket.owner = ops->owner; - skt->socket.dev.parent = dev; - - skt->dev = dev; - skt->ops = ops; - - ret = soc_pcmcia_add_one(skt); - if (ret) - break; - - WARN_ON(skt->socket.sock != i); - } - - if (ret) { - while (--i >= 0) - soc_pcmcia_remove_one(&sinfo->skt[i]); - kfree(sinfo); - } else { - dev_set_drvdata(dev, sinfo); - } - - return ret; -} -EXPORT_SYMBOL(soc_common_drv_pcmcia_probe); - MODULE_AUTHOR("John Dorsey "); MODULE_DESCRIPTION("Linux PCMCIA Card Services: Common SoC support"); MODULE_LICENSE("Dual MPL/GPL"); diff --git a/drivers/pcmcia/soc_common.h b/drivers/pcmcia/soc_common.h index c33b8c3..8a755c1 100644 --- a/drivers/pcmcia/soc_common.h +++ b/drivers/pcmcia/soc_common.h @@ -138,8 +138,6 @@ extern void soc_common_pcmcia_get_timing(struct soc_pcmcia_socket *, struct soc_ void soc_pcmcia_remove_one(struct soc_pcmcia_socket *skt); int soc_pcmcia_add_one(struct soc_pcmcia_socket *skt); -extern int soc_common_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, struct skt_dev_info *sinfo); - #ifdef CONFIG_PCMCIA_DEBUG -- cgit v0.10.2 From 701a5dc05ad99a06958b3f97cb69d99b47cebee3 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 29 Mar 2009 19:42:44 +0100 Subject: PCMCIA: sa1111: wrap soc_pcmcia_socket to contain sa1111 specific data Signed-off-by: Russell King Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c index 3cb4fd2..c9c104b 100644 --- a/drivers/pcmcia/pxa2xx_base.c +++ b/drivers/pcmcia/pxa2xx_base.c @@ -228,7 +228,7 @@ static const char *skt_names[] = { #define SKT_DEV_INFO_SIZE(n) \ (sizeof(struct skt_dev_info) + (n)*sizeof(struct soc_pcmcia_socket)) -static int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt) +int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt) { skt->res_skt.start = _PCMCIA(skt->nr); skt->res_skt.end = _PCMCIA(skt->nr) + PCMCIASp - 1; @@ -253,9 +253,18 @@ static int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt) return soc_pcmcia_add_one(skt); } +void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops) +{ + /* Provide our PXA2xx specific timing routines. */ + ops->set_timing = pxa2xx_pcmcia_set_timing; +#ifdef CONFIG_CPU_FREQ + ops->frequency_change = pxa2xx_pcmcia_frequency_change; +#endif +} + int __pxa2xx_drv_pcmcia_probe(struct device *dev) { - int i, ret; + int i, ret = 0; struct pcmcia_low_level *ops; struct skt_dev_info *sinfo; struct soc_pcmcia_socket *skt; @@ -265,11 +274,7 @@ int __pxa2xx_drv_pcmcia_probe(struct device *dev) ops = (struct pcmcia_low_level *)dev->platform_data; - /* Provide our PXA2xx specific timing routines. */ - ops->set_timing = pxa2xx_pcmcia_set_timing; -#ifdef CONFIG_CPU_FREQ - ops->frequency_change = pxa2xx_pcmcia_frequency_change; -#endif + pxa2xx_drv_pcmcia_ops(ops); sinfo = kzalloc(SKT_DEV_INFO_SIZE(ops->nr), GFP_KERNEL); if (!sinfo) diff --git a/drivers/pcmcia/pxa2xx_base.h b/drivers/pcmcia/pxa2xx_base.h index 235d681..cb5efae 100644 --- a/drivers/pcmcia/pxa2xx_base.h +++ b/drivers/pcmcia/pxa2xx_base.h @@ -1,3 +1,6 @@ /* temporary measure */ extern int __pxa2xx_drv_pcmcia_probe(struct device *); +int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt); +void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops); + diff --git a/drivers/pcmcia/pxa2xx_lubbock.c b/drivers/pcmcia/pxa2xx_lubbock.c index 6cbb1b1..35d5280 100644 --- a/drivers/pcmcia/pxa2xx_lubbock.c +++ b/drivers/pcmcia/pxa2xx_lubbock.c @@ -32,6 +32,7 @@ static int lubbock_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state) { + struct sa1111_pcmcia_socket *s = to_skt(skt); unsigned int pa_dwr_mask, pa_dwr_set, misc_mask, misc_set; int ret = 0; @@ -149,7 +150,7 @@ lubbock_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, if (ret == 0) { lubbock_set_misc_wr(misc_mask, misc_set); - sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, pa_dwr_set); + sa1111_set_io(s->dev, pa_dwr_mask, pa_dwr_set); } #if 1 @@ -175,7 +176,7 @@ lubbock_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, * Switch to 5V, Configure socket with 5V voltage */ lubbock_set_misc_wr(misc_mask, 0); - sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, 0); + sa1111_set_io(s->dev, pa_dwr_mask, 0); /* * It takes about 100ms to turn off Vcc. @@ -228,8 +229,9 @@ int pcmcia_lubbock_init(struct sa1111_dev *sadev) /* Set CF Socket 1 power to standby mode. */ lubbock_set_misc_wr((1 << 15) | (1 << 14), 0); - sadev->dev.platform_data = &lubbock_pcmcia_ops; - ret = __pxa2xx_drv_pcmcia_probe(&sadev->dev); + pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops); + ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops, + pxa2xx_drv_pcmcia_add_one); } return ret; diff --git a/drivers/pcmcia/sa1100_badge4.c b/drivers/pcmcia/sa1100_badge4.c index 1ca9737..6399314 100644 --- a/drivers/pcmcia/sa1100_badge4.c +++ b/drivers/pcmcia/sa1100_badge4.c @@ -134,6 +134,9 @@ static struct pcmcia_low_level badge4_pcmcia_ops = { .socket_init = sa1111_pcmcia_socket_init, .socket_suspend = sa1111_pcmcia_socket_suspend, + + .first = 0, + .nr = 2, }; int pcmcia_badge4_init(struct device *dev) @@ -146,7 +149,9 @@ int pcmcia_badge4_init(struct device *dev) __func__, badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc); - ret = sa11xx_drv_pcmcia_probe(dev, &badge4_pcmcia_ops, 0, 2); + sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops); + ret = sa1111_pcmcia_add(dev, &badge4_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } return ret; diff --git a/drivers/pcmcia/sa1100_jornada720.c b/drivers/pcmcia/sa1100_jornada720.c index 7eedb42..4a32f4f 100644 --- a/drivers/pcmcia/sa1100_jornada720.c +++ b/drivers/pcmcia/sa1100_jornada720.c @@ -24,6 +24,7 @@ static int jornada720_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { + struct sa1111_pcmcia_socket *s = to_skt(skt); unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; /* @@ -31,9 +32,9 @@ static int jornada720_pcmcia_hw_init(struct soc_pcmcia_socket *skt) */ GRER |= 0x00000002; /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ - sa1111_set_io_dir(SA1111_DEV(skt->dev), pin, 0, 0); - sa1111_set_io(SA1111_DEV(skt->dev), pin, 0); - sa1111_set_sleep_io(SA1111_DEV(skt->dev), pin, 0); + sa1111_set_io_dir(s->dev, pin, 0, 0); + sa1111_set_io(s->dev, pin, 0); + sa1111_set_sleep_io(s->dev, pin, 0); return sa1111_pcmcia_hw_init(skt); } @@ -41,6 +42,7 @@ static int jornada720_pcmcia_hw_init(struct soc_pcmcia_socket *skt) static int jornada720_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state) { + struct sa1111_pcmcia_socket *s = to_skt(skt); unsigned int pa_dwr_mask, pa_dwr_set; int ret; @@ -97,7 +99,7 @@ jornada720_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_s unsigned long flags; local_irq_save(flags); - sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, pa_dwr_set); + sa1111_set_io(s->dev, pa_dwr_mask, pa_dwr_set); local_irq_restore(flags); } @@ -113,14 +115,20 @@ static struct pcmcia_low_level jornada720_pcmcia_ops = { .socket_init = sa1111_pcmcia_socket_init, .socket_suspend = sa1111_pcmcia_socket_suspend, + + .first = 0, + .nr = 2, }; int __devinit pcmcia_jornada720_init(struct device *dev) { int ret = -ENODEV; - if (machine_is_jornada720()) - ret = sa11xx_drv_pcmcia_probe(dev, &jornada720_pcmcia_ops, 0, 2); + if (machine_is_jornada720()) { + sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops); + ret = sa1111_pcmcia_add(dev, &jornada720_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); + } return ret; } diff --git a/drivers/pcmcia/sa1100_neponset.c b/drivers/pcmcia/sa1100_neponset.c index 0c76d33..e39c65a 100644 --- a/drivers/pcmcia/sa1100_neponset.c +++ b/drivers/pcmcia/sa1100_neponset.c @@ -43,6 +43,7 @@ static int neponset_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state) { + struct sa1111_pcmcia_socket *s = to_skt(skt); unsigned int ncr_mask, ncr_set, pa_dwr_mask, pa_dwr_set; int ret; @@ -99,7 +100,7 @@ neponset_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_sta NCR_0 = (NCR_0 & ~ncr_mask) | ncr_set; local_irq_restore(flags); - sa1111_set_io(SA1111_DEV(skt->dev), pa_dwr_mask, pa_dwr_set); + sa1111_set_io(s->dev, pa_dwr_mask, pa_dwr_set); } return 0; @@ -121,6 +122,8 @@ static struct pcmcia_low_level neponset_pcmcia_ops = { .configure_socket = neponset_pcmcia_configure_socket, .socket_init = neponset_pcmcia_socket_init, .socket_suspend = sa1111_pcmcia_socket_suspend, + .first = 0, + .nr = 2, }; int pcmcia_neponset_init(struct sa1111_dev *sadev) @@ -135,7 +138,9 @@ int pcmcia_neponset_init(struct sa1111_dev *sadev) sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0); sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0); - ret = sa11xx_drv_pcmcia_probe(&sadev->dev, &neponset_pcmcia_ops, 0, 2); + sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops); + ret = sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops, + sa11xx_drv_pcmcia_add_one); } return ret; diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index a6793e3..98c7915 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -30,9 +30,6 @@ static struct pcmcia_irqs irqs[] = { int sa1111_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { - if (skt->irq == NO_IRQ) - skt->irq = skt->nr ? IRQ_S1_READY_NINT : IRQ_S0_READY_NINT; - return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } @@ -43,8 +40,8 @@ void sa1111_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) void sa1111_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_state *state) { - struct sa1111_dev *sadev = SA1111_DEV(skt->dev); - unsigned long status = sa1111_readl(sadev->mapbase + SA1111_PCSR); + struct sa1111_pcmcia_socket *s = to_skt(skt); + unsigned long status = sa1111_readl(s->dev->mapbase + SA1111_PCSR); switch (skt->nr) { case 0: @@ -71,7 +68,7 @@ void sa1111_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_sta int sa1111_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state) { - struct sa1111_dev *sadev = SA1111_DEV(skt->dev); + struct sa1111_pcmcia_socket *s = to_skt(skt); unsigned int pccr_skt_mask, pccr_set_mask, val; unsigned long flags; @@ -100,10 +97,10 @@ int sa1111_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_s pccr_set_mask |= PCCR_S0_FLT|PCCR_S1_FLT; local_irq_save(flags); - val = sa1111_readl(sadev->mapbase + SA1111_PCCR); + val = sa1111_readl(s->dev->mapbase + SA1111_PCCR); val &= ~pccr_skt_mask; val |= pccr_set_mask & pccr_skt_mask; - sa1111_writel(val, sadev->mapbase + SA1111_PCCR); + sa1111_writel(val, s->dev->mapbase + SA1111_PCCR); local_irq_restore(flags); return 0; @@ -119,10 +116,45 @@ void sa1111_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) soc_pcmcia_disable_irqs(skt, irqs, ARRAY_SIZE(irqs)); } +int sa1111_pcmcia_add(struct sa1111_dev *dev, struct pcmcia_low_level *ops, + int (*add)(struct soc_pcmcia_socket *)) +{ + struct sa1111_pcmcia_socket *s; + int i, ret = 0; + + s = kzalloc(sizeof(*s) * ops->nr, GFP_KERNEL); + if (!s) + return -ENODEV; + + for (i = 0; i < ops->nr; i++) { + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return -ENOMEM; + + s->soc.nr = ops->first + i; + s->soc.irq = s->soc.nr ? IRQ_S1_READY_NINT : IRQ_S0_READY_NINT; + s->soc.ops = ops; + s->soc.socket.owner = ops->owner; + s->soc.socket.dev.parent = &dev->dev; + s->dev = dev; + + ret = add(&s->soc); + if (ret == 0) { + s->next = dev_get_drvdata(&dev->dev); + dev_set_drvdata(&dev->dev, s); + } else + kfree(s); + } + + return ret; +} + static int pcmcia_probe(struct sa1111_dev *dev) { void __iomem *base; + dev_set_drvdata(&dev->dev, NULL); + if (!request_mem_region(dev->res.start, 512, SA1111_DRIVER_NAME(dev))) return -EBUSY; @@ -152,15 +184,15 @@ static int pcmcia_probe(struct sa1111_dev *dev) static int __devexit pcmcia_remove(struct sa1111_dev *dev) { - struct skt_dev_info *sinfo = dev_get_drvdata(&dev->dev); - int i; + struct sa1111_pcmcia_socket *next, *s = dev_get_drvdata(&dev->dev); dev_set_drvdata(&dev->dev, NULL); - for (i = 0; i < sinfo->nskt; i++) - soc_pcmcia_remove_one(&sinfo->skt[i]); + for (; next = s->next, s; s = next) { + soc_pcmcia_remove_one(&s->soc); + kfree(s); + } - kfree(sinfo); release_mem_region(dev->res.start, 512); return 0; } diff --git a/drivers/pcmcia/sa1111_generic.h b/drivers/pcmcia/sa1111_generic.h index 10ced4a..536fe15 100644 --- a/drivers/pcmcia/sa1111_generic.h +++ b/drivers/pcmcia/sa1111_generic.h @@ -1,6 +1,20 @@ #include "soc_common.h" #include "sa11xx_base.h" +struct sa1111_pcmcia_socket { + struct soc_pcmcia_socket soc; + struct sa1111_dev *dev; + struct sa1111_pcmcia_socket *next; +}; + +static inline struct sa1111_pcmcia_socket *to_skt(struct soc_pcmcia_socket *s) +{ + return container_of(s, struct sa1111_pcmcia_socket, soc); +} + +int sa1111_pcmcia_add(struct sa1111_dev *dev, struct pcmcia_low_level *ops, + int (*add)(struct soc_pcmcia_socket *)); + extern int sa1111_pcmcia_hw_init(struct soc_pcmcia_socket *); extern void sa1111_pcmcia_hw_shutdown(struct soc_pcmcia_socket *); extern void sa1111_pcmcia_socket_state(struct soc_pcmcia_socket *, struct pcmcia_state *); diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index 92a4348..4db8149 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -171,7 +171,7 @@ static const char *skt_names[] = { #define SKT_DEV_INFO_SIZE(n) \ (sizeof(struct skt_dev_info) + (n)*sizeof(struct soc_pcmcia_socket)) -static int sa11xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt) +int sa11xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt) { skt->res_skt.start = _PCMCIA(skt->nr); skt->res_skt.end = _PCMCIA(skt->nr) + PCMCIASp - 1; @@ -195,14 +195,10 @@ static int sa11xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt) return soc_pcmcia_add_one(skt); } +EXPORT_SYMBOL(sa11xx_drv_pcmcia_add_one); -int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, - int first, int nr) +void sa11xx_drv_pcmcia_ops(struct pcmcia_low_level *ops) { - struct skt_dev_info *sinfo; - struct soc_pcmcia_socket *skt; - int i; - /* * set default MECR calculation if the board specific * code did not specify one... @@ -216,6 +212,17 @@ int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, #ifdef CONFIG_CPU_FREQ ops->frequency_change = sa1100_pcmcia_frequency_change; #endif +} +EXPORT_SYMBOL(sa11xx_drv_pcmcia_ops); + +int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, + int first, int nr) +{ + struct skt_dev_info *sinfo; + struct soc_pcmcia_socket *skt; + int i, ret = 0; + + sa11xx_drv_pcmcia_ops(ops); sinfo = kzalloc(SKT_DEV_INFO_SIZE(nr), GFP_KERNEL); if (!sinfo) diff --git a/drivers/pcmcia/sa11xx_base.h b/drivers/pcmcia/sa11xx_base.h index 7bc2082..3d76d72 100644 --- a/drivers/pcmcia/sa11xx_base.h +++ b/drivers/pcmcia/sa11xx_base.h @@ -118,6 +118,8 @@ static inline unsigned int sa1100_pcmcia_cmd_time(unsigned int cpu_clock_khz, } +int sa11xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt); +void sa11xx_drv_pcmcia_ops(struct pcmcia_low_level *ops); extern int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, int first, int nr); #endif /* !defined(_PCMCIA_SA1100_H) */ -- cgit v0.10.2 From dabd14684bc2375bf69f227f04993a4dc2fd3a16 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 29 Mar 2009 22:35:11 +0100 Subject: PCMCIA: sa1111: remove duplicated initializers Signed-off-by: Russell King Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/pxa2xx_lubbock.c b/drivers/pcmcia/pxa2xx_lubbock.c index 35d5280..b9f8c8f 100644 --- a/drivers/pcmcia/pxa2xx_lubbock.c +++ b/drivers/pcmcia/pxa2xx_lubbock.c @@ -201,12 +201,8 @@ lubbock_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, static struct pcmcia_low_level lubbock_pcmcia_ops = { .owner = THIS_MODULE, - .hw_init = sa1111_pcmcia_hw_init, - .hw_shutdown = sa1111_pcmcia_hw_shutdown, - .socket_state = sa1111_pcmcia_socket_state, .configure_socket = lubbock_pcmcia_configure_socket, .socket_init = sa1111_pcmcia_socket_init, - .socket_suspend = sa1111_pcmcia_socket_suspend, .first = 0, .nr = 2, }; diff --git a/drivers/pcmcia/sa1100_badge4.c b/drivers/pcmcia/sa1100_badge4.c index 6399314..1ce53f4 100644 --- a/drivers/pcmcia/sa1100_badge4.c +++ b/drivers/pcmcia/sa1100_badge4.c @@ -127,14 +127,8 @@ badge4_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state static struct pcmcia_low_level badge4_pcmcia_ops = { .owner = THIS_MODULE, - .hw_init = sa1111_pcmcia_hw_init, - .hw_shutdown = sa1111_pcmcia_hw_shutdown, - .socket_state = sa1111_pcmcia_socket_state, .configure_socket = badge4_pcmcia_configure_socket, - .socket_init = sa1111_pcmcia_socket_init, - .socket_suspend = sa1111_pcmcia_socket_suspend, - .first = 0, .nr = 2, }; diff --git a/drivers/pcmcia/sa1100_jornada720.c b/drivers/pcmcia/sa1100_jornada720.c index 4a32f4f..6bcabee 100644 --- a/drivers/pcmcia/sa1100_jornada720.c +++ b/drivers/pcmcia/sa1100_jornada720.c @@ -22,23 +22,6 @@ #define SOCKET1_POWER (GPIO_GPIO1 | GPIO_GPIO3) #define SOCKET1_3V GPIO_GPIO3 -static int jornada720_pcmcia_hw_init(struct soc_pcmcia_socket *skt) -{ - struct sa1111_pcmcia_socket *s = to_skt(skt); - unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; - - /* - * What is all this crap for? - */ - GRER |= 0x00000002; - /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ - sa1111_set_io_dir(s->dev, pin, 0, 0); - sa1111_set_io(s->dev, pin, 0); - sa1111_set_sleep_io(s->dev, pin, 0); - - return sa1111_pcmcia_hw_init(skt); -} - static int jornada720_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_t *state) { @@ -108,14 +91,8 @@ jornada720_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_s static struct pcmcia_low_level jornada720_pcmcia_ops = { .owner = THIS_MODULE, - .hw_init = jornada720_pcmcia_hw_init, - .hw_shutdown = sa1111_pcmcia_hw_shutdown, - .socket_state = sa1111_pcmcia_socket_state, .configure_socket = jornada720_pcmcia_configure_socket, - .socket_init = sa1111_pcmcia_socket_init, - .socket_suspend = sa1111_pcmcia_socket_suspend, - .first = 0, .nr = 2, }; @@ -125,6 +102,15 @@ int __devinit pcmcia_jornada720_init(struct device *dev) int ret = -ENODEV; if (machine_is_jornada720()) { + unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3; + + GRER |= 0x00000002; + + /* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */ + sa1111_set_io_dir(dev, pin, 0, 0); + sa1111_set_io(dev, pin, 0); + sa1111_set_sleep_io(dev, pin, 0); + sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops); ret = sa1111_pcmcia_add(dev, &jornada720_pcmcia_ops, sa11xx_drv_pcmcia_add_one); diff --git a/drivers/pcmcia/sa1100_neponset.c b/drivers/pcmcia/sa1100_neponset.c index e39c65a..c95639b 100644 --- a/drivers/pcmcia/sa1100_neponset.c +++ b/drivers/pcmcia/sa1100_neponset.c @@ -116,12 +116,8 @@ static void neponset_pcmcia_socket_init(struct soc_pcmcia_socket *skt) static struct pcmcia_low_level neponset_pcmcia_ops = { .owner = THIS_MODULE, - .hw_init = sa1111_pcmcia_hw_init, - .hw_shutdown = sa1111_pcmcia_hw_shutdown, - .socket_state = sa1111_pcmcia_socket_state, .configure_socket = neponset_pcmcia_configure_socket, .socket_init = neponset_pcmcia_socket_init, - .socket_suspend = sa1111_pcmcia_socket_suspend, .first = 0, .nr = 2, }; diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index 98c7915..219860a 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -28,12 +28,12 @@ static struct pcmcia_irqs irqs[] = { { 1, IRQ_S1_BVD1_STSCHG, "SA1111 CF BVD1" }, }; -int sa1111_pcmcia_hw_init(struct soc_pcmcia_socket *skt) +static int sa1111_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } -void sa1111_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) +static void sa1111_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) { soc_pcmcia_free_irqs(skt, irqs, ARRAY_SIZE(irqs)); } @@ -111,7 +111,7 @@ void sa1111_pcmcia_socket_init(struct soc_pcmcia_socket *skt) soc_pcmcia_enable_irqs(skt, irqs, ARRAY_SIZE(irqs)); } -void sa1111_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) +static void sa1111_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) { soc_pcmcia_disable_irqs(skt, irqs, ARRAY_SIZE(irqs)); } @@ -122,6 +122,11 @@ int sa1111_pcmcia_add(struct sa1111_dev *dev, struct pcmcia_low_level *ops, struct sa1111_pcmcia_socket *s; int i, ret = 0; + ops->hw_init = sa1111_pcmcia_hw_init; + ops->hw_shutdown = sa1111_pcmcia_hw_shutdown; + ops->socket_state = sa1111_pcmcia_socket_state; + ops->socket_suspend = sa1111_pcmcia_socket_suspend; + s = kzalloc(sizeof(*s) * ops->nr, GFP_KERNEL); if (!s) return -ENODEV; diff --git a/drivers/pcmcia/sa1111_generic.h b/drivers/pcmcia/sa1111_generic.h index 536fe15..02dc857 100644 --- a/drivers/pcmcia/sa1111_generic.h +++ b/drivers/pcmcia/sa1111_generic.h @@ -15,12 +15,9 @@ static inline struct sa1111_pcmcia_socket *to_skt(struct soc_pcmcia_socket *s) int sa1111_pcmcia_add(struct sa1111_dev *dev, struct pcmcia_low_level *ops, int (*add)(struct soc_pcmcia_socket *)); -extern int sa1111_pcmcia_hw_init(struct soc_pcmcia_socket *); -extern void sa1111_pcmcia_hw_shutdown(struct soc_pcmcia_socket *); extern void sa1111_pcmcia_socket_state(struct soc_pcmcia_socket *, struct pcmcia_state *); extern int sa1111_pcmcia_configure_socket(struct soc_pcmcia_socket *, const socket_state_t *); extern void sa1111_pcmcia_socket_init(struct soc_pcmcia_socket *); -extern void sa1111_pcmcia_socket_suspend(struct soc_pcmcia_socket *); extern int pcmcia_badge4_init(struct device *); extern int pcmcia_jornada720_init(struct device *); -- cgit v0.10.2 From b62d99b5028b6df1c32b864fd9dd32ad6b42d396 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 29 Mar 2009 22:14:32 +0100 Subject: PCMCIA: soc_common: constify soc_pcmcia_socket ops member No one should modify the ops structure supplied to soc_pcmcia_socket so make it const. Signed-off-by: Russell King Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/soc_common.h b/drivers/pcmcia/soc_common.h index 8a755c1..f7533dc 100644 --- a/drivers/pcmcia/soc_common.h +++ b/drivers/pcmcia/soc_common.h @@ -37,7 +37,7 @@ struct soc_pcmcia_socket { /* * Core PCMCIA state */ - struct pcmcia_low_level *ops; + const struct pcmcia_low_level *ops; unsigned int status; socket_state_t cs_state; -- cgit v0.10.2 From f397b9c5dcc30a575973b2e4f0a602fc85b38853 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 29 Mar 2009 22:12:34 +0100 Subject: PCMCIA: soc_common: remove 'dev' member from soc_pcmcia_socket The 'dev' member is now only ever written, so we can safely remove it. Signed-off-by: Russell King Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c index c9c104b..7f61b62 100644 --- a/drivers/pcmcia/pxa2xx_base.c +++ b/drivers/pcmcia/pxa2xx_base.c @@ -288,7 +288,6 @@ int __pxa2xx_drv_pcmcia_probe(struct device *dev) skt->nr = ops->first + i; skt->irq = NO_IRQ; - skt->dev = dev; skt->ops = ops; skt->socket.owner = ops->owner; skt->socket.dev.parent = dev; diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index 4db8149..ac99872 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -236,7 +236,6 @@ int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, skt->nr = first + i; skt->irq = NO_IRQ; - skt->dev = dev; skt->ops = ops; skt->socket.owner = ops->owner; skt->socket.dev.parent = dev; diff --git a/drivers/pcmcia/soc_common.h b/drivers/pcmcia/soc_common.h index f7533dc..4fb06f7 100644 --- a/drivers/pcmcia/soc_common.h +++ b/drivers/pcmcia/soc_common.h @@ -30,7 +30,6 @@ struct soc_pcmcia_socket { /* * Info from low level handler */ - struct device *dev; unsigned int nr; unsigned int irq; -- cgit v0.10.2 From 1689164a272a962572a1f31af715dfe462cf7910 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 29 Mar 2009 22:43:43 +0100 Subject: PCMCIA: ss: allow PCI IRQs > 255 Signed-off-by: Russell King Signed-off-by: Dominik Brodowski diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index d696a69..753da9b 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -172,7 +172,7 @@ struct pcmcia_socket { u_int irq_mask; u_int map_size; u_int io_offset; - u_char pci_irq; + u_int pci_irq; struct pci_dev * cb_dev; -- cgit v0.10.2 From 66024db57d5b9011e274b314affad68f370c0d6f Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Sun, 29 Mar 2009 22:45:26 +0100 Subject: PCMCIA: stop duplicating pci_irq in soc_pcmcia_socket skt->irq is a mere duplication of pcmcia_socket's pci_irq member. Get rid of it. Signed-off-by: Russell King Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c index 7f61b62..84dde776 100644 --- a/drivers/pcmcia/pxa2xx_base.c +++ b/drivers/pcmcia/pxa2xx_base.c @@ -287,10 +287,10 @@ int __pxa2xx_drv_pcmcia_probe(struct device *dev) skt = &sinfo->skt[i]; skt->nr = ops->first + i; - skt->irq = NO_IRQ; skt->ops = ops; skt->socket.owner = ops->owner; skt->socket.dev.parent = dev; + skt->socket.pci_irq = NO_IRQ; ret = pxa2xx_drv_pcmcia_add_one(skt); if (ret) diff --git a/drivers/pcmcia/pxa2xx_cm_x255.c b/drivers/pcmcia/pxa2xx_cm_x255.c index 5143a76..05913d0 100644 --- a/drivers/pcmcia/pxa2xx_cm_x255.c +++ b/drivers/pcmcia/pxa2xx_cm_x255.c @@ -44,7 +44,7 @@ static int cmx255_pcmcia_hw_init(struct soc_pcmcia_socket *skt) return ret; gpio_direction_output(GPIO_PCMCIA_RESET, 0); - skt->irq = skt->nr == 0 ? PCMCIA_S0_RDYINT : PCMCIA_S1_RDYINT; + skt->socket.pci_irq = skt->nr == 0 ? PCMCIA_S0_RDYINT : PCMCIA_S1_RDYINT; ret = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); if (!ret) gpio_free(GPIO_PCMCIA_RESET); diff --git a/drivers/pcmcia/pxa2xx_cm_x270.c b/drivers/pcmcia/pxa2xx_cm_x270.c index a7b943d..5662646 100644 --- a/drivers/pcmcia/pxa2xx_cm_x270.c +++ b/drivers/pcmcia/pxa2xx_cm_x270.c @@ -38,7 +38,7 @@ static int cmx270_pcmcia_hw_init(struct soc_pcmcia_socket *skt) return ret; gpio_direction_output(GPIO_PCMCIA_RESET, 0); - skt->irq = PCMCIA_S0_RDYINT; + skt->socket.pci_irq = PCMCIA_S0_RDYINT; ret = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); if (!ret) gpio_free(GPIO_PCMCIA_RESET); diff --git a/drivers/pcmcia/pxa2xx_e740.c b/drivers/pcmcia/pxa2xx_e740.c index d09c0dc..8bfbd4d 100644 --- a/drivers/pcmcia/pxa2xx_e740.c +++ b/drivers/pcmcia/pxa2xx_e740.c @@ -38,7 +38,7 @@ static struct pcmcia_irqs cd_irqs[] = { static int e740_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { - skt->irq = skt->nr == 0 ? IRQ_GPIO(GPIO_E740_PCMCIA_RDY0) : + skt->socket.pci_irq = skt->nr == 0 ? IRQ_GPIO(GPIO_E740_PCMCIA_RDY0) : IRQ_GPIO(GPIO_E740_PCMCIA_RDY1); return soc_pcmcia_request_irqs(skt, &cd_irqs[skt->nr], 1); diff --git a/drivers/pcmcia/pxa2xx_mainstone.c b/drivers/pcmcia/pxa2xx_mainstone.c index 1138551..92016fe 100644 --- a/drivers/pcmcia/pxa2xx_mainstone.c +++ b/drivers/pcmcia/pxa2xx_mainstone.c @@ -44,7 +44,7 @@ static int mst_pcmcia_hw_init(struct soc_pcmcia_socket *skt) * before we enable them as outputs. */ - skt->irq = (skt->nr == 0) ? MAINSTONE_S0_IRQ : MAINSTONE_S1_IRQ; + skt->socket.pci_irq = (skt->nr == 0) ? MAINSTONE_S0_IRQ : MAINSTONE_S1_IRQ; return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } diff --git a/drivers/pcmcia/pxa2xx_palmld.c b/drivers/pcmcia/pxa2xx_palmld.c index 5ba9b36..6fb6f7f 100644 --- a/drivers/pcmcia/pxa2xx_palmld.c +++ b/drivers/pcmcia/pxa2xx_palmld.c @@ -45,7 +45,7 @@ static int palmld_pcmcia_hw_init(struct soc_pcmcia_socket *skt) if (ret) goto err4; - skt->irq = IRQ_GPIO(GPIO_NR_PALMLD_PCMCIA_READY); + skt->socket.pci_irq = IRQ_GPIO(GPIO_NR_PALMLD_PCMCIA_READY); return 0; err4: diff --git a/drivers/pcmcia/pxa2xx_palmtx.c b/drivers/pcmcia/pxa2xx_palmtx.c index e07b5c5..b07b247 100644 --- a/drivers/pcmcia/pxa2xx_palmtx.c +++ b/drivers/pcmcia/pxa2xx_palmtx.c @@ -53,7 +53,7 @@ static int palmtx_pcmcia_hw_init(struct soc_pcmcia_socket *skt) if (ret) goto err5; - skt->irq = gpio_to_irq(GPIO_NR_PALMTX_PCMCIA_READY); + skt->socket.pci_irq = gpio_to_irq(GPIO_NR_PALMTX_PCMCIA_READY); return 0; err5: diff --git a/drivers/pcmcia/pxa2xx_sharpsl.c b/drivers/pcmcia/pxa2xx_sharpsl.c index bc43f78..0ea3b29 100644 --- a/drivers/pcmcia/pxa2xx_sharpsl.c +++ b/drivers/pcmcia/pxa2xx_sharpsl.c @@ -66,7 +66,7 @@ static int sharpsl_pcmcia_hw_init(struct soc_pcmcia_socket *skt) } } - skt->irq = SCOOP_DEV[skt->nr].irq; + skt->socket.pci_irq = SCOOP_DEV[skt->nr].irq; return 0; } diff --git a/drivers/pcmcia/pxa2xx_trizeps4.c b/drivers/pcmcia/pxa2xx_trizeps4.c index e0e5cb3..b7e5966 100644 --- a/drivers/pcmcia/pxa2xx_trizeps4.c +++ b/drivers/pcmcia/pxa2xx_trizeps4.c @@ -53,7 +53,7 @@ static int trizeps_pcmcia_hw_init(struct soc_pcmcia_socket *skt) gpio_free(GPIO_PRDY); return -EINVAL; } - skt->irq = IRQ_GPIO(GPIO_PRDY); + skt->socket.pci_irq = IRQ_GPIO(GPIO_PRDY); break; #ifndef CONFIG_MACH_TRIZEPS_CONXS @@ -63,7 +63,7 @@ static int trizeps_pcmcia_hw_init(struct soc_pcmcia_socket *skt) break; } /* release the reset of this card */ - pr_debug("%s: sock %d irq %d\n", __func__, skt->nr, skt->irq); + pr_debug("%s: sock %d irq %d\n", __func__, skt->nr, skt->socket.pci_irq); /* supplementory irqs for the socket */ for (i = 0; i < ARRAY_SIZE(irqs); i++) { diff --git a/drivers/pcmcia/pxa2xx_viper.c b/drivers/pcmcia/pxa2xx_viper.c index 1787136..27be2e15 100644 --- a/drivers/pcmcia/pxa2xx_viper.c +++ b/drivers/pcmcia/pxa2xx_viper.c @@ -40,7 +40,7 @@ static int viper_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { unsigned long flags; - skt->irq = gpio_to_irq(VIPER_CF_RDY_GPIO); + skt->socket.pci_irq = gpio_to_irq(VIPER_CF_RDY_GPIO); if (gpio_request(VIPER_CF_CD_GPIO, "CF detect")) goto err_request_cd; diff --git a/drivers/pcmcia/sa1100_assabet.c b/drivers/pcmcia/sa1100_assabet.c index ac8aa09..fd013a1 100644 --- a/drivers/pcmcia/sa1100_assabet.c +++ b/drivers/pcmcia/sa1100_assabet.c @@ -27,7 +27,7 @@ static struct pcmcia_irqs irqs[] = { static int assabet_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { - skt->irq = ASSABET_IRQ_GPIO_CF_IRQ; + skt->socket.pci_irq = ASSABET_IRQ_GPIO_CF_IRQ; return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } diff --git a/drivers/pcmcia/sa1100_cerf.c b/drivers/pcmcia/sa1100_cerf.c index 63e6bc4..9bf088b 100644 --- a/drivers/pcmcia/sa1100_cerf.c +++ b/drivers/pcmcia/sa1100_cerf.c @@ -27,7 +27,7 @@ static struct pcmcia_irqs irqs[] = { static int cerf_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { - skt->irq = CERF_IRQ_GPIO_CF_IRQ; + skt->socket.pci_irq = CERF_IRQ_GPIO_CF_IRQ; return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } diff --git a/drivers/pcmcia/sa1100_h3600.c b/drivers/pcmcia/sa1100_h3600.c index 0cc3748..3a121ac69 100644 --- a/drivers/pcmcia/sa1100_h3600.c +++ b/drivers/pcmcia/sa1100_h3600.c @@ -25,8 +25,8 @@ static struct pcmcia_irqs irqs[] = { static int h3600_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { - skt->irq = skt->nr ? IRQ_GPIO_H3600_PCMCIA_IRQ1 - : IRQ_GPIO_H3600_PCMCIA_IRQ0; + skt->socket.pci_irq = skt->nr ? IRQ_GPIO_H3600_PCMCIA_IRQ1 + : IRQ_GPIO_H3600_PCMCIA_IRQ0; return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); diff --git a/drivers/pcmcia/sa1100_shannon.c b/drivers/pcmcia/sa1100_shannon.c index 46d8c19..c4d5186 100644 --- a/drivers/pcmcia/sa1100_shannon.c +++ b/drivers/pcmcia/sa1100_shannon.c @@ -28,7 +28,7 @@ static int shannon_pcmcia_hw_init(struct soc_pcmcia_socket *skt) GAFR &= ~(SHANNON_GPIO_EJECT_0 | SHANNON_GPIO_EJECT_1 | SHANNON_GPIO_RDY_0 | SHANNON_GPIO_RDY_1); - skt->irq = skt->nr ? SHANNON_IRQ_GPIO_RDY_1 : SHANNON_IRQ_GPIO_RDY_0; + skt->socket.pci_irq = skt->nr ? SHANNON_IRQ_GPIO_RDY_1 : SHANNON_IRQ_GPIO_RDY_0; return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } diff --git a/drivers/pcmcia/sa1100_simpad.c b/drivers/pcmcia/sa1100_simpad.c index 33a08ae..05bd504 100644 --- a/drivers/pcmcia/sa1100_simpad.c +++ b/drivers/pcmcia/sa1100_simpad.c @@ -28,7 +28,7 @@ static int simpad_pcmcia_hw_init(struct soc_pcmcia_socket *skt) clear_cs3_bit(VCC_3V_EN|VCC_5V_EN|EN0|EN1); - skt->irq = IRQ_GPIO_CF_IRQ; + skt->socket.pci_irq = IRQ_GPIO_CF_IRQ; return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); } diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index 219860a..deb5036 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -137,10 +137,10 @@ int sa1111_pcmcia_add(struct sa1111_dev *dev, struct pcmcia_low_level *ops, return -ENOMEM; s->soc.nr = ops->first + i; - s->soc.irq = s->soc.nr ? IRQ_S1_READY_NINT : IRQ_S0_READY_NINT; s->soc.ops = ops; s->soc.socket.owner = ops->owner; s->soc.socket.dev.parent = &dev->dev; + s->soc.socket.pci_irq = s->soc.nr ? IRQ_S1_READY_NINT : IRQ_S0_READY_NINT; s->dev = dev; ret = add(&s->soc); diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c index ac99872..fc9a652 100644 --- a/drivers/pcmcia/sa11xx_base.c +++ b/drivers/pcmcia/sa11xx_base.c @@ -235,10 +235,10 @@ int sa11xx_drv_pcmcia_probe(struct device *dev, struct pcmcia_low_level *ops, skt = &sinfo->skt[i]; skt->nr = first + i; - skt->irq = NO_IRQ; skt->ops = ops; skt->socket.owner = ops->owner; skt->socket.dev.parent = dev; + skt->socket.pci_irq = NO_IRQ; ret = sa11xx_drv_pcmcia_add_one(skt); if (ret) diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c index 6bc60f8..6f1a86b 100644 --- a/drivers/pcmcia/soc_common.c +++ b/drivers/pcmcia/soc_common.c @@ -144,10 +144,10 @@ soc_common_pcmcia_config_skt(struct soc_pcmcia_socket *skt, socket_state_t *stat */ if (skt->irq_state != 1 && state->io_irq) { skt->irq_state = 1; - set_irq_type(skt->irq, IRQ_TYPE_EDGE_FALLING); + set_irq_type(skt->socket.pci_irq, IRQ_TYPE_EDGE_FALLING); } else if (skt->irq_state == 1 && state->io_irq == 0) { skt->irq_state = 0; - set_irq_type(skt->irq, IRQ_TYPE_NONE); + set_irq_type(skt->socket.pci_irq, IRQ_TYPE_NONE); } skt->cs_state = *state; @@ -492,7 +492,8 @@ static ssize_t show_status(struct device *dev, struct device_attribute *attr, ch p+=sprintf(p, "Vcc : %d\n", skt->cs_state.Vcc); p+=sprintf(p, "Vpp : %d\n", skt->cs_state.Vpp); - p+=sprintf(p, "IRQ : %d (%d)\n", skt->cs_state.io_irq, skt->irq); + p+=sprintf(p, "IRQ : %d (%d)\n", skt->cs_state.io_irq, + skt->socket.pci_irq); if (skt->ops->show_timing) p+=skt->ops->show_timing(skt, p); @@ -695,7 +696,6 @@ int soc_pcmcia_add_one(struct soc_pcmcia_socket *skt) skt->socket.resource_ops = &pccard_static_ops; skt->socket.irq_mask = 0; skt->socket.map_size = PAGE_SIZE; - skt->socket.pci_irq = skt->irq; skt->socket.io_offset = (unsigned long)skt->virt_io; skt->status = soc_common_pcmcia_skt_state(skt); diff --git a/drivers/pcmcia/soc_common.h b/drivers/pcmcia/soc_common.h index 4fb06f7..e40824c 100644 --- a/drivers/pcmcia/soc_common.h +++ b/drivers/pcmcia/soc_common.h @@ -31,7 +31,6 @@ struct soc_pcmcia_socket { * Info from low level handler */ unsigned int nr; - unsigned int irq; /* * Core PCMCIA state -- cgit v0.10.2 From 55a19b39acb8888af8e9cfe5b762d03c52fdb48c Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 29 Oct 2009 00:54:49 +0100 Subject: pcmcia/staging: update comedi drivers Update comedi PCMCIA drivers to work with recent PCMCIA changes documented in Documentation/pcmcia/driver-changes.txt: - use pcmcia_config_loop() - don't use PCMCIA_DEBUG, but use dev_dbg() - don't use cs_error() - re-use prod_id and card_id values already stored Acked-by: Greg Kroah-Hartman Signed-off-by: Dominik Brodowski diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c index 80c0df8..9e75802 100644 --- a/drivers/staging/comedi/drivers/cb_das16_cs.c +++ b/drivers/staging/comedi/drivers/cb_das16_cs.c @@ -141,37 +141,14 @@ static int das16cs_timer_insn_config(struct comedi_device *dev, struct comedi_insn *insn, unsigned int *data); -static int get_prodid(struct comedi_device *dev, struct pcmcia_device *link) -{ - tuple_t tuple; - u_short buf[128]; - int prodid = 0; - - tuple.TupleData = (cisdata_t *) buf; - tuple.TupleOffset = 0; - tuple.TupleDataMax = 255; - tuple.DesiredTuple = CISTPL_MANFID; - tuple.Attributes = TUPLE_RETURN_COMMON; - if ((pcmcia_get_first_tuple(link, &tuple) == 0) && - (pcmcia_get_tuple_data(link, &tuple) == 0)) { - prodid = le16_to_cpu(buf[1]); - } - - return prodid; -} - static const struct das16cs_board *das16cs_probe(struct comedi_device *dev, struct pcmcia_device *link) { - int id; int i; - id = get_prodid(dev, link); - for (i = 0; i < n_boards; i++) { - if (das16cs_boards[i].device_id == id) { + if (das16cs_boards[i].device_id == link->card_id) return das16cs_boards + i; - } } printk("unknown board!\n"); @@ -660,27 +637,8 @@ static int das16cs_timer_insn_config(struct comedi_device *dev, ======================================================================*/ -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ #if defined(CONFIG_PCMCIA) || defined(CONFIG_PCMCIA_MODULE) -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = - "cb_das16_cs.c pcmcia code (David Schleef), modified from dummy_cs.c 1.31 2001/08/24 12:13:13 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif - -/*====================================================================*/ - static void das16cs_pcmcia_config(struct pcmcia_device *link); static void das16cs_pcmcia_release(struct pcmcia_device *link); static int das16cs_pcmcia_suspend(struct pcmcia_device *p_dev); @@ -733,7 +691,7 @@ static int das16cs_pcmcia_attach(struct pcmcia_device *link) { struct local_info_t *local; - DEBUG(0, "das16cs_pcmcia_attach()\n"); + dev_dbg(&link->dev, "das16cs_pcmcia_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(struct local_info_t), GFP_KERNEL); @@ -760,7 +718,7 @@ static int das16cs_pcmcia_attach(struct pcmcia_device *link) static void das16cs_pcmcia_detach(struct pcmcia_device *link) { - DEBUG(0, "das16cs_pcmcia_detach(0x%p)\n", link); + dev_dbg(&link->dev, "das16cs_pcmcia_detach\n"); if (link->dev_node) { ((struct local_info_t *)link->priv)->stop = 1; @@ -771,118 +729,55 @@ static void das16cs_pcmcia_detach(struct pcmcia_device *link) kfree(link->priv); } /* das16cs_pcmcia_detach */ -static void das16cs_pcmcia_config(struct pcmcia_device *link) -{ - struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_fn, last_ret; - u_char buf[64]; - cistpl_cftable_entry_t dflt = { 0 }; - DEBUG(0, "das16cs_pcmcia_config(0x%p)\n", link); - - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - last_fn = GetFirstTuple; - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret != 0) - goto cs_failed; - - last_fn = GetTupleData; - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret != 0) - goto cs_failed; - - last_fn = ParseTuple; - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret != 0) - goto cs_failed; - - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; +static int das16cs_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + if (cfg->index == 0) + return -EINVAL; - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_fn = GetFirstTuple; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) - goto cs_failed; - - while (1) { - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - if (pcmcia_get_tuple_data(link, &tuple)) - goto next_entry; - if (pcmcia_parse_tuple(&tuple, &parse)) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Does this card need audio output? */ -/* if (cfg->flags & CISTPL_CFTABLE_AUDIO) { - link->conf.Attributes |= CONF_ENABLE_SPKR; - link->conf.Status = CCSR_AUDIO_ENA; - } -*/ - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io)) - goto next_entry; + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; } + /* This reserves IO space but doesn't actually enable it */ + return pcmcia_request_io(p_dev, &p_dev->io); + } - /* If we got this far, we're cool! */ - break; + return 0; +} + +static void das16cs_pcmcia_config(struct pcmcia_device *link) +{ + struct local_info_t *dev = link->priv; + int ret; -next_entry: - last_fn = GetNextTuple; + dev_dbg(&link->dev, "das16cs_pcmcia_config\n"); - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) - goto cs_failed; + ret = pcmcia_loop_config(link, das16cs_pcmcia_config_loop, NULL); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } /* @@ -891,21 +786,18 @@ next_entry: irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_fn = RequestIRQ; - - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) - goto cs_failed; + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* This actually configures the PCMCIA socket -- setting up the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_fn = RequestConfiguration; - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret) - goto cs_failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -930,14 +822,13 @@ next_entry: return; -cs_failed: - cs_error(link, last_fn, last_ret); +failed: das16cs_pcmcia_release(link); } /* das16cs_pcmcia_config */ static void das16cs_pcmcia_release(struct pcmcia_device *link) { - DEBUG(0, "das16cs_pcmcia_release(0x%p)\n", link); + dev_dbg(&link->dev, "das16cs_pcmcia_release\n"); pcmcia_disable_device(link); } /* das16cs_pcmcia_release */ @@ -983,14 +874,13 @@ struct pcmcia_driver das16cs_driver = { static int __init init_das16cs_pcmcia_cs(void) { - DEBUG(0, "%s\n", version); pcmcia_register_driver(&das16cs_driver); return 0; } static void __exit exit_das16cs_pcmcia_cs(void) { - DEBUG(0, "das16cs_pcmcia_cs: unloading\n"); + pr_debug("das16cs_pcmcia_cs: unloading\n"); pcmcia_unregister_driver(&das16cs_driver); } diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c index 9cab21e..384a77a 100644 --- a/drivers/staging/comedi/drivers/das08_cs.c +++ b/drivers/staging/comedi/drivers/das08_cs.c @@ -110,25 +110,6 @@ static int das08_cs_attach(struct comedi_device *dev, ======================================================================*/ -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static const char *version = - "das08.c pcmcia code (Frank Hess), modified from dummy_cs.c 1.31 2001/08/24 12:13:13 (David Hinds)"; -#else -#define DEBUG(n, args...) -#endif - -/*====================================================================*/ static void das08_pcmcia_config(struct pcmcia_device *link); static void das08_pcmcia_release(struct pcmcia_device *link); static int das08_pcmcia_suspend(struct pcmcia_device *p_dev); @@ -181,7 +162,7 @@ static int das08_pcmcia_attach(struct pcmcia_device *link) { struct local_info_t *local; - DEBUG(0, "das08_pcmcia_attach()\n"); + dev_dbg(&link->dev, "das08_pcmcia_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(struct local_info_t), GFP_KERNEL); @@ -224,7 +205,7 @@ static int das08_pcmcia_attach(struct pcmcia_device *link) static void das08_pcmcia_detach(struct pcmcia_device *link) { - DEBUG(0, "das08_pcmcia_detach(0x%p)\n", link); + dev_dbg(&link->dev, "das08_pcmcia_detach\n"); if (link->dev_node) { ((struct local_info_t *)link->priv)->stop = 1; @@ -237,6 +218,44 @@ static void das08_pcmcia_detach(struct pcmcia_device *link) } /* das08_pcmcia_detach */ + +static int das08_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + if (cfg->index == 0) + return -ENODEV; + + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; + } + /* This reserves IO space but doesn't actually enable it */ + return pcmcia_request_io(p_dev, &p_dev->io); + } + return 0; +} + + /*====================================================================== das08_pcmcia_config() is scheduled to run after a CARD_INSERTION event @@ -248,128 +267,20 @@ static void das08_pcmcia_detach(struct pcmcia_device *link) static void das08_pcmcia_config(struct pcmcia_device *link) { struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_fn, last_ret; - u_char buf[64]; - cistpl_cftable_entry_t dflt = { 0 }; - - DEBUG(0, "das08_pcmcia_config(0x%p)\n", link); - - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - last_fn = GetFirstTuple; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) - goto cs_failed; - - last_fn = GetTupleData; - - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) - goto cs_failed; - - last_fn = ParseTuple; - - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) - goto cs_failed; - - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; - - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_fn = GetFirstTuple; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) - goto cs_failed; - - while (1) { - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) - goto next_entry; - - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Does this card need audio output? */ -/* if (cfg->flags & CISTPL_CFTABLE_AUDIO) { - link->conf.Attributes |= CONF_ENABLE_SPKR; - link->conf.Status = CCSR_AUDIO_ENA; - } -*/ - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io) != 0) - goto next_entry; - } - - /* If we got this far, we're cool! */ - break; + int ret; -next_entry: - last_fn = GetNextTuple; + dev_dbg(&link->dev, "das08_pcmcia_config\n"); - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) - goto cs_failed; + ret = pcmcia_loop_config(link, das08_pcmcia_config_loop, NULL); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_fn = RequestIRQ; - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) - goto cs_failed; + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -377,10 +288,9 @@ next_entry: the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_fn = RequestConfiguration; - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret) - goto cs_failed; + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -405,8 +315,7 @@ next_entry: return; -cs_failed: - cs_error(link, last_fn, last_ret); +failed: das08_pcmcia_release(link); } /* das08_pcmcia_config */ @@ -421,7 +330,7 @@ cs_failed: static void das08_pcmcia_release(struct pcmcia_device *link) { - DEBUG(0, "das08_pcmcia_release(0x%p)\n", link); + dev_dbg(&link->dev, "das08_pcmcia_release\n"); pcmcia_disable_device(link); } /* das08_pcmcia_release */ @@ -477,14 +386,13 @@ struct pcmcia_driver das08_cs_driver = { static int __init init_das08_pcmcia_cs(void) { - DEBUG(0, "%s\n", version); pcmcia_register_driver(&das08_cs_driver); return 0; } static void __exit exit_das08_pcmcia_cs(void) { - DEBUG(0, "das08_pcmcia_cs: unloading\n"); + pr_debug("das08_pcmcia_cs: unloading\n"); pcmcia_unregister_driver(&das08_cs_driver); } diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c index ec31a39..e06d5b2 100644 --- a/drivers/staging/comedi/drivers/ni_daq_700.c +++ b/drivers/staging/comedi/drivers/ni_daq_700.c @@ -436,25 +436,7 @@ static int dio700_detach(struct comedi_device *dev) return 0; }; -/* PCMCIA crap */ - -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = "ni_daq_700.c, based on dummy_cs.c"; -#else -#define DEBUG(n, args...) -#endif - -/*====================================================================*/ +/* PCMCIA crap -- watch your words, please! */ static void dio700_config(struct pcmcia_device *link); static void dio700_release(struct pcmcia_device *link); @@ -510,7 +492,7 @@ static int dio700_cs_attach(struct pcmcia_device *link) printk(KERN_INFO "ni_daq_700: cs-attach\n"); - DEBUG(0, "dio700_cs_attach()\n"); + dev_dbg(&link->dev, "dio700_cs_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(struct local_info_t), GFP_KERNEL); @@ -555,7 +537,7 @@ static void dio700_cs_detach(struct pcmcia_device *link) printk(KERN_INFO "ni_daq_700: cs-detach!\n"); - DEBUG(0, "dio700_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "dio700_cs_detach\n"); if (link->dev_node) { ((struct local_info_t *)link->priv)->stop = 1; @@ -576,141 +558,85 @@ static void dio700_cs_detach(struct pcmcia_device *link) ======================================================================*/ -static void dio700_config(struct pcmcia_device *link) +static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) { - struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_ret; - u_char buf[64]; - win_req_t req; + win_req_t *req = priv_data; memreq_t map; - cistpl_cftable_entry_t dflt = { 0 }; - printk(KERN_INFO "ni_daq_700: cs-config\n"); - - DEBUG(0, "dio700_config(0x%p)\n", link); + if (cfg->index == 0) + return -ENODEV; - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; + /* Does this card need audio output? */ + if (cfg->flags & CISTPL_CFTABLE_AUDIO) { + p_dev->conf.Attributes |= CONF_ENABLE_SPKR; + p_dev->conf.Status = CCSR_AUDIO_ENA; } - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) { - cs_error(link, GetTupleData, last_ret); - goto cs_failed; + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; + } + /* This reserves IO space but doesn't actually enable it */ + if (pcmcia_request_io(p_dev, &p_dev->io) != 0) + return -ENODEV; } - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) { - cs_error(link, ParseTuple, last_ret); - goto cs_failed; + if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) { + cistpl_mem_t *mem = + (cfg->mem.nwin) ? &cfg->mem : &dflt->mem; + req->Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; + req->Attributes |= WIN_ENABLE; + req->Base = mem->win[0].host_addr; + req->Size = mem->win[0].len; + if (req->Size < 0x1000) + req->Size = 0x1000; + req->AccessSpeed = 0; + if (pcmcia_request_window(&p_dev, req, &p_dev->win)) + return -ENODEV; + map.Page = 0; + map.CardOffset = mem->win[0].card_addr; + if (pcmcia_map_mem_page(p_dev->win, &map)) + return -ENODEV; } - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; - - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret != 0) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; - } - while (1) { - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - if (pcmcia_get_tuple_data(link, &tuple) != 0) - goto next_entry; - if (pcmcia_parse_tuple(&tuple, &parse) != 0) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Does this card need audio output? */ - if (cfg->flags & CISTPL_CFTABLE_AUDIO) { - link->conf.Attributes |= CONF_ENABLE_SPKR; - link->conf.Status = CCSR_AUDIO_ENA; - } + /* If we got this far, we're cool! */ + return 0; +} - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io) != 0) - goto next_entry; - } +static void dio700_config(struct pcmcia_device *link) +{ + struct local_info_t *dev = link->priv; + win_req_t req; + int ret; - if ((cfg->mem.nwin > 0) || (dflt.mem.nwin > 0)) { - cistpl_mem_t *mem = - (cfg->mem.nwin) ? &cfg->mem : &dflt.mem; - req.Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; - req.Attributes |= WIN_ENABLE; - req.Base = mem->win[0].host_addr; - req.Size = mem->win[0].len; - if (req.Size < 0x1000) - req.Size = 0x1000; - req.AccessSpeed = 0; - if (pcmcia_request_window(&link, &req, &link->win)) - goto next_entry; - map.Page = 0; - map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(link->win, &map)) - goto next_entry; - } - /* If we got this far, we're cool! */ - break; + printk(KERN_INFO "ni_daq_700: cs-config\n"); -next_entry: + dev_dbg(&link->dev, "dio700_config\n"); - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetNextTuple, last_ret); - goto cs_failed; - } + ret = pcmcia_loop_config(link, dio700_pcmcia_config_loop, &req); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } /* @@ -719,11 +645,9 @@ next_entry: irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) { - cs_error(link, RequestIRQ, last_ret); - goto cs_failed; - } + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -731,11 +655,9 @@ next_entry: the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret != 0) { - cs_error(link, RequestConfiguration, last_ret); - goto cs_failed; - } + ret = pcmcia_request_configuration(link, &link->conf); + if (ret != 0) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -763,7 +685,7 @@ next_entry: return; -cs_failed: +failed: printk(KERN_INFO "ni_daq_700 cs failed"); dio700_release(link); @@ -771,7 +693,7 @@ cs_failed: static void dio700_release(struct pcmcia_device *link) { - DEBUG(0, "dio700_release(0x%p)\n", link); + dev_dbg(&link->dev, "dio700_release\n"); pcmcia_disable_device(link); } /* dio700_release */ @@ -830,15 +752,13 @@ struct pcmcia_driver dio700_cs_driver = { static int __init init_dio700_cs(void) { - printk("ni_daq_700: cs-init \n"); - DEBUG(0, "%s\n", version); pcmcia_register_driver(&dio700_cs_driver); return 0; } static void __exit exit_dio700_cs(void) { - DEBUG(0, "ni_daq_700: unloading\n"); + pr_debug("ni_daq_700: unloading\n"); pcmcia_unregister_driver(&dio700_cs_driver); } diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c index 0700a8b..9257a42 100644 --- a/drivers/staging/comedi/drivers/ni_daq_dio24.c +++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c @@ -187,25 +187,7 @@ static int dio24_detach(struct comedi_device *dev) return 0; }; -/* PCMCIA crap */ - -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = "ni_daq_dio24.c, based on dummy_cs.c"; -#else -#define DEBUG(n, args...) -#endif - -/*====================================================================*/ +/* PCMCIA crap -- watch your words! */ static void dio24_config(struct pcmcia_device *link); static void dio24_release(struct pcmcia_device *link); @@ -261,7 +243,7 @@ static int dio24_cs_attach(struct pcmcia_device *link) printk(KERN_INFO "ni_daq_dio24: HOLA SOY YO - CS-attach!\n"); - DEBUG(0, "dio24_cs_attach()\n"); + dev_dbg(&link->dev, "dio24_cs_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(struct local_info_t), GFP_KERNEL); @@ -306,7 +288,7 @@ static void dio24_cs_detach(struct pcmcia_device *link) printk(KERN_INFO "ni_daq_dio24: HOLA SOY YO - cs-detach!\n"); - DEBUG(0, "dio24_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "dio24_cs_detach\n"); if (link->dev_node) { ((struct local_info_t *)link->priv)->stop = 1; @@ -327,142 +309,85 @@ static void dio24_cs_detach(struct pcmcia_device *link) ======================================================================*/ -static void dio24_config(struct pcmcia_device *link) +static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) { - struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_ret; - u_char buf[64]; - win_req_t req; + win_req_t *req = priv_data; memreq_t map; - cistpl_cftable_entry_t dflt = { 0 }; - printk(KERN_INFO "ni_daq_dio24: HOLA SOY YO! - config\n"); - - DEBUG(0, "dio24_config(0x%p)\n", link); - - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; - } + if (cfg->index == 0) + return -ENODEV; - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) { - cs_error(link, GetTupleData, last_ret); - goto cs_failed; + /* Does this card need audio output? */ + if (cfg->flags & CISTPL_CFTABLE_AUDIO) { + p_dev->conf.Attributes |= CONF_ENABLE_SPKR; + p_dev->conf.Status = CCSR_AUDIO_ENA; } - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) { - cs_error(link, ParseTuple, last_ret); - goto cs_failed; + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; + } + /* This reserves IO space but doesn't actually enable it */ + if (pcmcia_request_io(p_dev, &p_dev->io) != 0) + return -ENODEV; } - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; - - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; + if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) { + cistpl_mem_t *mem = + (cfg->mem.nwin) ? &cfg->mem : &dflt->mem; + req->Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; + req->Attributes |= WIN_ENABLE; + req->Base = mem->win[0].host_addr; + req->Size = mem->win[0].len; + if (req->Size < 0x1000) + req->Size = 0x1000; + req->AccessSpeed = 0; + if (pcmcia_request_window(&p_dev, req, &p_dev->win)) + return -ENODEV; + map.Page = 0; + map.CardOffset = mem->win[0].card_addr; + if (pcmcia_map_mem_page(p_dev->win, &map)) + return -ENODEV; } - while (1) { - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - if (pcmcia_get_tuple_data(link, &tuple) != 0) - goto next_entry; - if (pcmcia_parse_tuple(&tuple, &parse) != 0) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Does this card need audio output? */ - if (cfg->flags & CISTPL_CFTABLE_AUDIO) { - link->conf.Attributes |= CONF_ENABLE_SPKR; - link->conf.Status = CCSR_AUDIO_ENA; - } + /* If we got this far, we're cool! */ + return 0; +} - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io) != 0) - goto next_entry; - } +static void dio24_config(struct pcmcia_device *link) +{ + struct local_info_t *dev = link->priv; + int ret; + win_req_t req; - if ((cfg->mem.nwin > 0) || (dflt.mem.nwin > 0)) { - cistpl_mem_t *mem = - (cfg->mem.nwin) ? &cfg->mem : &dflt.mem; - req.Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; - req.Attributes |= WIN_ENABLE; - req.Base = mem->win[0].host_addr; - req.Size = mem->win[0].len; - if (req.Size < 0x1000) - req.Size = 0x1000; - req.AccessSpeed = 0; - if (pcmcia_request_window(&link, &req, &link->win)) - goto next_entry; - map.Page = 0; - map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(link->win, &map)) - goto next_entry; - } - /* If we got this far, we're cool! */ - break; + printk(KERN_INFO "ni_daq_dio24: HOLA SOY YO! - config\n"); -next_entry: + dev_dbg(&link->dev, "dio24_config\n"); - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetNextTuple, last_ret); - goto cs_failed; - } + ret = pcmcia_loop_config(link, dio24_pcmcia_config_loop, &req); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } /* @@ -471,11 +396,9 @@ next_entry: irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) { - cs_error(link, RequestIRQ, last_ret); - goto cs_failed; - } + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -483,11 +406,9 @@ next_entry: the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret) { - cs_error(link, RequestConfiguration, last_ret); - goto cs_failed; - } + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -515,7 +436,7 @@ next_entry: return; -cs_failed: +failed: printk(KERN_INFO "Fallo"); dio24_release(link); @@ -523,7 +444,7 @@ cs_failed: static void dio24_release(struct pcmcia_device *link) { - DEBUG(0, "dio24_release(0x%p)\n", link); + dev_dbg(&link->dev, "dio24_release\n"); pcmcia_disable_device(link); } /* dio24_release */ @@ -582,14 +503,12 @@ struct pcmcia_driver dio24_cs_driver = { static int __init init_dio24_cs(void) { printk("ni_daq_dio24: HOLA SOY YO!\n"); - DEBUG(0, "%s\n", version); pcmcia_register_driver(&dio24_cs_driver); return 0; } static void __exit exit_dio24_cs(void) { - DEBUG(0, "ni_dio24: unloading\n"); pcmcia_unregister_driver(&dio24_cs_driver); } diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c index a3053b8..28ff4a6 100644 --- a/drivers/staging/comedi/drivers/ni_labpc_cs.c +++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c @@ -153,23 +153,6 @@ static int labpc_attach(struct comedi_device *dev, struct comedi_devconfig *it) return labpc_common_attach(dev, iobase, irq, 0); } -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static const char *version = - "ni_labpc.c, based on dummy_cs.c 1.31 2001/08/24 12:13:13"; -#else -#define DEBUG(n, args...) -#endif - /*====================================================================*/ /* @@ -236,7 +219,7 @@ static int labpc_cs_attach(struct pcmcia_device *link) { struct local_info_t *local; - DEBUG(0, "labpc_cs_attach()\n"); + dev_dbg(&link->dev, "labpc_cs_attach()\n"); /* Allocate space for private device-specific data */ local = kzalloc(sizeof(struct local_info_t), GFP_KERNEL); @@ -278,7 +261,7 @@ static int labpc_cs_attach(struct pcmcia_device *link) static void labpc_cs_detach(struct pcmcia_device *link) { - DEBUG(0, "labpc_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "labpc_cs_detach\n"); /* If the device is currently configured and active, we won't @@ -305,135 +288,84 @@ static void labpc_cs_detach(struct pcmcia_device *link) ======================================================================*/ -static void labpc_config(struct pcmcia_device *link) +static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) { - struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_ret; - u_char buf[64]; - win_req_t req; + win_req_t *req = priv_data; memreq_t map; - cistpl_cftable_entry_t dflt = { 0 }; - DEBUG(0, "labpc_config(0x%p)\n", link); + if (cfg->index == 0) + return -ENODEV; - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; + /* Does this card need audio output? */ + if (cfg->flags & CISTPL_CFTABLE_AUDIO) { + p_dev->conf.Attributes |= CONF_ENABLE_SPKR; + p_dev->conf.Status = CCSR_AUDIO_ENA; } - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) { - cs_error(link, GetTupleData, last_ret); - goto cs_failed; + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; + } + /* This reserves IO space but doesn't actually enable it */ + if (pcmcia_request_io(p_dev, &p_dev->io) != 0) + return -ENODEV; } - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) { - cs_error(link, ParseTuple, last_ret); - goto cs_failed; + if ((cfg->mem.nwin > 0) || (dflt->mem.nwin > 0)) { + cistpl_mem_t *mem = + (cfg->mem.nwin) ? &cfg->mem : &dflt->mem; + req->Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; + req->Attributes |= WIN_ENABLE; + req->Base = mem->win[0].host_addr; + req->Size = mem->win[0].len; + if (req->Size < 0x1000) + req->Size = 0x1000; + req->AccessSpeed = 0; + if (pcmcia_request_window(&p_dev, req, &p_dev->win)) + return -ENODEV; + map.Page = 0; + map.CardOffset = mem->win[0].card_addr; + if (pcmcia_map_mem_page(p_dev->win, &map)) + return -ENODEV; } - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; + /* If we got this far, we're cool! */ + return 0; +} - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; - } - while (1) { - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - if (pcmcia_get_tuple_data(link, &tuple)) - goto next_entry; - if (pcmcia_parse_tuple(&tuple, &parse)) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Does this card need audio output? */ - if (cfg->flags & CISTPL_CFTABLE_AUDIO) { - link->conf.Attributes |= CONF_ENABLE_SPKR; - link->conf.Status = CCSR_AUDIO_ENA; - } - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io)) - goto next_entry; - } +static void labpc_config(struct pcmcia_device *link) +{ + struct local_info_t *dev = link->priv; + int ret; + win_req_t req; - if ((cfg->mem.nwin > 0) || (dflt.mem.nwin > 0)) { - cistpl_mem_t *mem = - (cfg->mem.nwin) ? &cfg->mem : &dflt.mem; - req.Attributes = WIN_DATA_WIDTH_16 | WIN_MEMORY_TYPE_CM; - req.Attributes |= WIN_ENABLE; - req.Base = mem->win[0].host_addr; - req.Size = mem->win[0].len; - if (req.Size < 0x1000) - req.Size = 0x1000; - req.AccessSpeed = 0; - link->win = (window_handle_t) link; - if (pcmcia_request_window(&link, &req, &link->win)) - goto next_entry; - map.Page = 0; - map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(link->win, &map)) - goto next_entry; - } - /* If we got this far, we're cool! */ - break; + dev_dbg(&link->dev, "labpc_config\n"); -next_entry: - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetNextTuple, last_ret); - goto cs_failed; - } + ret = pcmcia_loop_config(link, labpc_pcmcia_config_loop, &req); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } /* @@ -442,11 +374,9 @@ next_entry: irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) { - cs_error(link, RequestIRQ, last_ret); - goto cs_failed; - } + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -454,11 +384,9 @@ next_entry: the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret) { - cs_error(link, RequestConfiguration, last_ret); - goto cs_failed; - } + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -486,14 +414,14 @@ next_entry: return; -cs_failed: +failed: labpc_release(link); } /* labpc_config */ static void labpc_release(struct pcmcia_device *link) { - DEBUG(0, "labpc_release(0x%p)\n", link); + dev_dbg(&link->dev, "labpc_release\n"); pcmcia_disable_device(link); } /* labpc_release */ @@ -551,14 +479,12 @@ struct pcmcia_driver labpc_cs_driver = { static int __init init_labpc_cs(void) { - DEBUG(0, "%s\n", version); pcmcia_register_driver(&labpc_cs_driver); return 0; } static void __exit exit_labpc_cs(void) { - DEBUG(0, "ni_labpc: unloading\n"); pcmcia_unregister_driver(&labpc_cs_driver); } diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c index 9aef87f..ca7ab4a 100644 --- a/drivers/staging/comedi/drivers/ni_mio_cs.c +++ b/drivers/staging/comedi/drivers/ni_mio_cs.c @@ -312,96 +312,50 @@ static int mio_cs_resume(struct pcmcia_device *link) return 0; } -static void mio_cs_config(struct pcmcia_device *link) -{ - tuple_t tuple; - u_short buf[128]; - cisparse_t parse; - int manfid = 0, prodid = 0; - int ret; - DPRINTK("mio_cs_config(link=%p)\n", link); - - tuple.TupleData = (cisdata_t *) buf; - tuple.TupleOffset = 0; - tuple.TupleDataMax = 255; - tuple.Attributes = 0; +static int mio_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + int base, ret; - tuple.DesiredTuple = CISTPL_CONFIG; - ret = pcmcia_get_first_tuple(link, &tuple); - ret = pcmcia_get_tuple_data(link, &tuple); - ret = pcmcia_parse_tuple(&tuple, &parse); - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; + p_dev->io.NumPorts1 = cfg->io.win[0].len; + p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK; + p_dev->io.NumPorts2 = 0; -#if 0 - tuple.DesiredTuple = CISTPL_LONGLINK_MFC; - tuple.Attributes = TUPLE_RETURN_COMMON | TUPLE_RETURN_LINK; - info->multi(first_tuple(link, &tuple, &parse) == 0); -#endif + p_dev->irq.IRQInfo1 = cfg->irq.IRQInfo1; + p_dev->irq.IRQInfo2 = cfg->irq.IRQInfo2; - tuple.DesiredTuple = CISTPL_MANFID; - tuple.Attributes = TUPLE_RETURN_COMMON; - if ((pcmcia_get_first_tuple(link, &tuple) == 0) && - (pcmcia_get_tuple_data(link, &tuple) == 0)) { - manfid = le16_to_cpu(buf[0]); - prodid = le16_to_cpu(buf[1]); + for (base = 0x000; base < 0x400; base += 0x20) { + p_dev->io.BasePort1 = base; + ret = pcmcia_request_io(p_dev, &p_dev->io); + if (!ret) + return 0; } - /* printk("manfid = 0x%04x, 0x%04x\n",manfid,prodid); */ + return -ENODEV; +} - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - tuple.Attributes = 0; - ret = pcmcia_get_first_tuple(link, &tuple); - ret = pcmcia_get_tuple_data(link, &tuple); - ret = pcmcia_parse_tuple(&tuple, &parse); -#if 0 - printk(" index: 0x%x\n", parse.cftable_entry.index); - printk(" flags: 0x%x\n", parse.cftable_entry.flags); - printk(" io flags: 0x%x\n", parse.cftable_entry.io.flags); - printk(" io nwin: 0x%x\n", parse.cftable_entry.io.nwin); - printk(" io base: 0x%x\n", parse.cftable_entry.io.win[0].base); - printk(" io len: 0x%x\n", parse.cftable_entry.io.win[0].len); - printk(" irq1: 0x%x\n", parse.cftable_entry.irq.IRQInfo1); - printk(" irq2: 0x%x\n", parse.cftable_entry.irq.IRQInfo2); - printk(" mem flags: 0x%x\n", parse.cftable_entry.mem.flags); - printk(" mem nwin: 0x%x\n", parse.cftable_entry.mem.nwin); - printk(" subtuples: 0x%x\n", parse.cftable_entry.subtuples); -#endif +static void mio_cs_config(struct pcmcia_device *link) +{ + int ret; -#if 0 - link->io.NumPorts1 = 0x20; - link->io.IOAddrLines = 5; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; -#endif - link->io.NumPorts1 = parse.cftable_entry.io.win[0].len; - link->io.IOAddrLines = - parse.cftable_entry.io.flags & CISTPL_IO_LINES_MASK; - link->io.NumPorts2 = 0; + DPRINTK("mio_cs_config(link=%p)\n", link); - { - int base; - for (base = 0x000; base < 0x400; base += 0x20) { - link->io.BasePort1 = base; - ret = pcmcia_request_io(link, &link->io); - /* printk("RequestIO 0x%02x\n",ret); */ - if (!ret) - break; - } + ret = pcmcia_loop_config(link, mio_pcmcia_config_loop, NULL); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + return; } - link->irq.IRQInfo1 = parse.cftable_entry.irq.IRQInfo1; - link->irq.IRQInfo2 = parse.cftable_entry.irq.IRQInfo2; ret = pcmcia_request_irq(link, &link->irq); if (ret) { printk("pcmcia_request_irq() returned error: %i\n", ret); } - /* printk("RequestIRQ 0x%02x\n",ret); */ - - link->conf.ConfigIndex = 1; ret = pcmcia_request_configuration(link, &link->conf); - /* printk("RequestConfiguration %d\n",ret); */ link->dev_node = &dev_node; } @@ -475,40 +429,17 @@ static int mio_cs_attach(struct comedi_device *dev, struct comedi_devconfig *it) return 0; } -static int get_prodid(struct comedi_device *dev, struct pcmcia_device *link) -{ - tuple_t tuple; - u_short buf[128]; - int prodid = 0; - - tuple.TupleData = (cisdata_t *) buf; - tuple.TupleOffset = 0; - tuple.TupleDataMax = 255; - tuple.DesiredTuple = CISTPL_MANFID; - tuple.Attributes = TUPLE_RETURN_COMMON; - if ((pcmcia_get_first_tuple(link, &tuple) == 0) && - (pcmcia_get_tuple_data(link, &tuple) == 0)) { - prodid = le16_to_cpu(buf[1]); - } - - return prodid; -} - static int ni_getboardtype(struct comedi_device *dev, struct pcmcia_device *link) { - int id; int i; - id = get_prodid(dev, link); - for (i = 0; i < n_ni_boards; i++) { - if (ni_boards[i].device_id == id) { + if (ni_boards[i].device_id == link->card_id) return i; - } } - printk("unknown board 0x%04x -- pretend it is a ", id); + printk("unknown board 0x%04x -- pretend it is a ", link->card_id); return 0; } diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c index 344b823..48e7c27 100644 --- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c +++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c @@ -55,23 +55,6 @@ Devices: [Quatech] DAQP-208 (daqp), DAQP-308 #include #include -/* - All the PCMCIA modules use PCMCIA_DEBUG to control debugging. If - you do not define PCMCIA_DEBUG at all, all the debug code will be - left out. If you compile with PCMCIA_DEBUG=0, the debug code will - be present but disabled -- but it can then be enabled for specific - modules at load time with a 'pc_debug=#' option to insmod. -*/ - -#ifdef PCMCIA_DEBUG -static int pc_debug = PCMCIA_DEBUG; -module_param(pc_debug, int, 0644); -#define DEBUG(n, args...) if (pc_debug>(n)) printk(KERN_DEBUG args) -static char *version = "quatech_daqp_cs.c 1.10 2003/04/21 (Brent Baccala)"; -#else -#define DEBUG(n, args...) -#endif - /* Maximum number of separate DAQP devices we'll allow */ #define MAX_DEV 4 @@ -863,8 +846,6 @@ static int daqp_attach(struct comedi_device *dev, struct comedi_devconfig *it) { int ret; struct local_info_t *local = dev_table[it->options[0]]; - tuple_t tuple; - int i; struct comedi_subdevice *s; if (it->options[0] < 0 || it->options[0] >= MAX_DEV || !local) { @@ -883,29 +864,10 @@ static int daqp_attach(struct comedi_device *dev, struct comedi_devconfig *it) strcpy(local->board_name, "DAQP"); dev->board_name = local->board_name; - - tuple.DesiredTuple = CISTPL_VERS_1; - if (pcmcia_get_first_tuple(local->link, &tuple) == 0) { - u_char buf[128]; - - buf[0] = buf[sizeof(buf) - 1] = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 2; - if (pcmcia_get_tuple_data(local->link, &tuple) == 0) { - - for (i = 0; i < tuple.TupleDataLen - 4; i++) - if (buf[i] == 0) - break; - for (i++; i < tuple.TupleDataLen - 4; i++) - if (buf[i] == 0) - break; - i++; - if ((i < tuple.TupleDataLen - 4) - && (strncmp(buf + i, "DAQP", 4) == 0)) { - strncpy(local->board_name, buf + i, - sizeof(local->board_name)); - } + if (local->link->prod_id[2]) { + if (strncmp(local->link->prod_id[2], "DAQP", 4) == 0) { + strncpy(local->board_name, local->link->prod_id[2], + sizeof(local->board_name)); } } @@ -1058,7 +1020,7 @@ static int daqp_cs_attach(struct pcmcia_device *link) struct local_info_t *local; int i; - DEBUG(0, "daqp_cs_attach()\n"); + dev_dbg(&link->dev, "daqp_cs_attach()\n"); for (i = 0; i < MAX_DEV; i++) if (dev_table[i] == NULL) @@ -1112,7 +1074,7 @@ static void daqp_cs_detach(struct pcmcia_device *link) { struct local_info_t *dev = link->priv; - DEBUG(0, "daqp_cs_detach(0x%p)\n", link); + dev_dbg(&link->dev, "daqp_cs_detach\n"); if (link->dev_node) { dev->stop = 1; @@ -1134,115 +1096,54 @@ static void daqp_cs_detach(struct pcmcia_device *link) ======================================================================*/ -static void daqp_cs_config(struct pcmcia_device *link) -{ - struct local_info_t *dev = link->priv; - tuple_t tuple; - cisparse_t parse; - int last_ret; - u_char buf[64]; - - DEBUG(0, "daqp_cs_config(0x%p)\n", link); - - /* - This reads the card's CONFIG tuple to find its configuration - registers. - */ - tuple.DesiredTuple = CISTPL_CONFIG; - tuple.Attributes = 0; - tuple.TupleData = buf; - tuple.TupleDataMax = sizeof(buf); - tuple.TupleOffset = 0; - - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; - } - last_ret = pcmcia_get_tuple_data(link, &tuple); - if (last_ret) { - cs_error(link, GetTupleData, last_ret); - goto cs_failed; - } - - last_ret = pcmcia_parse_tuple(&tuple, &parse); - if (last_ret) { - cs_error(link, ParseTuple, last_ret); - goto cs_failed; - } - link->conf.ConfigBase = parse.config.base; - link->conf.Present = parse.config.rmask[0]; +static int daqp_pcmcia_config_loop(struct pcmcia_device *p_dev, + cistpl_cftable_entry_t *cfg, + cistpl_cftable_entry_t *dflt, + unsigned int vcc, + void *priv_data) +{ + if (cfg->index == 0) + return -ENODEV; - /* - In this loop, we scan the CIS for configuration table entries, - each of which describes a valid card configuration, including - voltage, IO window, memory window, and interrupt settings. - - We make no assumptions about the card to be configured: we use - just the information available in the CIS. In an ideal world, - this would work for any PCMCIA card, but it requires a complete - and accurate CIS. In practice, a driver usually "knows" most of - these things without consulting the CIS, and most client drivers - will only use the CIS to fill in implementation-defined details. - */ - tuple.DesiredTuple = CISTPL_CFTABLE_ENTRY; - last_ret = pcmcia_get_first_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetFirstTuple, last_ret); - goto cs_failed; + /* Do we need to allocate an interrupt? */ + if (cfg->irq.IRQInfo1 || dflt->irq.IRQInfo1) + p_dev->conf.Attributes |= CONF_ENABLE_IRQ; + + /* IO window settings */ + p_dev->io.NumPorts1 = p_dev->io.NumPorts2 = 0; + if ((cfg->io.nwin > 0) || (dflt->io.nwin > 0)) { + cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt->io; + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; + if (!(io->flags & CISTPL_IO_8BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_16; + if (!(io->flags & CISTPL_IO_16BIT)) + p_dev->io.Attributes1 = IO_DATA_PATH_WIDTH_8; + p_dev->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; + p_dev->io.BasePort1 = io->win[0].base; + p_dev->io.NumPorts1 = io->win[0].len; + if (io->nwin > 1) { + p_dev->io.Attributes2 = p_dev->io.Attributes1; + p_dev->io.BasePort2 = io->win[1].base; + p_dev->io.NumPorts2 = io->win[1].len; + } } - while (1) { - cistpl_cftable_entry_t dflt = { 0 }; - cistpl_cftable_entry_t *cfg = &(parse.cftable_entry); - if (pcmcia_get_tuple_data(link, &tuple)) - goto next_entry; - if (pcmcia_parse_tuple(&tuple, &parse)) - goto next_entry; - - if (cfg->flags & CISTPL_CFTABLE_DEFAULT) - dflt = *cfg; - if (cfg->index == 0) - goto next_entry; - link->conf.ConfigIndex = cfg->index; - - /* Do we need to allocate an interrupt? */ - if (cfg->irq.IRQInfo1 || dflt.irq.IRQInfo1) - link->conf.Attributes |= CONF_ENABLE_IRQ; - - /* IO window settings */ - link->io.NumPorts1 = link->io.NumPorts2 = 0; - if ((cfg->io.nwin > 0) || (dflt.io.nwin > 0)) { - cistpl_io_t *io = (cfg->io.nwin) ? &cfg->io : &dflt.io; - link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; - if (!(io->flags & CISTPL_IO_8BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - if (!(io->flags & CISTPL_IO_16BIT)) - link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; - link->io.IOAddrLines = io->flags & CISTPL_IO_LINES_MASK; - link->io.BasePort1 = io->win[0].base; - link->io.NumPorts1 = io->win[0].len; - if (io->nwin > 1) { - link->io.Attributes2 = link->io.Attributes1; - link->io.BasePort2 = io->win[1].base; - link->io.NumPorts2 = io->win[1].len; - } - } + /* This reserves IO space but doesn't actually enable it */ + return pcmcia_request_io(p_dev, &p_dev->io); +} - /* This reserves IO space but doesn't actually enable it */ - if (pcmcia_request_io(link, &link->io)) - goto next_entry; +static void daqp_cs_config(struct pcmcia_device *link) +{ + struct local_info_t *dev = link->priv; + int ret; - /* If we got this far, we're cool! */ - break; + dev_dbg(&link->dev, "daqp_cs_config\n"); -next_entry: - last_ret = pcmcia_get_next_tuple(link, &tuple); - if (last_ret) { - cs_error(link, GetNextTuple, last_ret); - goto cs_failed; - } + ret = pcmcia_loop_config(link, daqp_pcmcia_config_loop, NULL); + if (ret) { + dev_warn(&link->dev, "no configuration found\n"); + goto failed; } /* @@ -1251,11 +1152,9 @@ next_entry: irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - last_ret = pcmcia_request_irq(link, &link->irq); - if (last_ret) { - cs_error(link, RequestIRQ, last_ret); - goto cs_failed; - } + ret = pcmcia_request_irq(link, &link->irq); + if (ret) + goto failed; } /* @@ -1263,11 +1162,9 @@ next_entry: the I/O windows and the interrupt mapping, and putting the card and host interface into "Memory and IO" mode. */ - last_ret = pcmcia_request_configuration(link, &link->conf); - if (last_ret) { - cs_error(link, RequestConfiguration, last_ret); - goto cs_failed; - } + ret = pcmcia_request_configuration(link, &link->conf); + if (ret) + goto failed; /* At this point, the dev_node_t structure(s) need to be @@ -1296,14 +1193,14 @@ next_entry: return; -cs_failed: +failed: daqp_cs_release(link); } /* daqp_cs_config */ static void daqp_cs_release(struct pcmcia_device *link) { - DEBUG(0, "daqp_cs_release(0x%p)\n", link); + dev_dbg(&link->dev, "daqp_cs_release\n"); pcmcia_disable_device(link); } /* daqp_cs_release */ @@ -1363,7 +1260,6 @@ struct pcmcia_driver daqp_cs_driver = { int __init init_module(void) { - DEBUG(0, "%s\n", version); pcmcia_register_driver(&daqp_cs_driver); comedi_driver_register(&driver_daqp); return 0; @@ -1371,7 +1267,6 @@ int __init init_module(void) void __exit cleanup_module(void) { - DEBUG(0, "daqp_cs: unloading\n"); comedi_driver_unregister(&driver_daqp); pcmcia_unregister_driver(&daqp_cs_driver); } -- cgit v0.10.2 From 638bba55fe6440439005f02fcd6b0c1f908d0d11 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 7 Nov 2009 12:26:17 +0100 Subject: pcmcia: autoload module pcmcia Attempt to load the "pcmcia" module for 16-bit PCMCIA cards, so that PCMCIA support becomes available without pcmciautils/udev userspace interaction. Based on a suggestion and a patch Signed-off-by: Komuro but converted it to request_module_nowait() and move it to a later stage. Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index b229f6d..fc1c0c6 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -243,6 +243,13 @@ int pcmcia_register_socket(struct pcmcia_socket *socket) pcmcia_parse_events(socket, SS_DETECT); + /* + * Let's try to get the PCMCIA module for 16-bit PCMCIA support. + * If it fails, it doesn't matter -- we still have 32-bit CardBus + * support to offer, so this is not a failure mode. + */ + request_module_nowait("pcmcia"); + return 0; err: -- cgit v0.10.2 From f84d49b218b7d4c6cba2e0b41f24bd4045403962 Mon Sep 17 00:00:00 2001 From: Naohiro Ooiwa Date: Mon, 9 Nov 2009 00:46:42 +0900 Subject: signal: Print warning message when dropping signals When the system has too many timers or too many aggregate queued signals, the EAGAIN error is returned to application from kernel, including timer_create() [POSIX.1b]. It means that the app exceeded the limit of pending signals, but in general application writers do not expect this outcome and the current silent failure can cause rare app failures under very high load. This patch adds a new message when we reach the limit and if print_fatal_signals is enabled: task/1234: reached RLIMIT_SIGPENDING, dropping signal If you see this message and your system behaved unexpectedly, you can run following command to lift the limit: # ulimit -i unlimited With help from Hiroshi Shimamoto . Signed-off-by: Naohiro Ooiwa Cc: Andrew Morton Cc: Hiroshi Shimamoto Cc: Roland McGrath Cc: Peter Zijlstra Cc: oleg@redhat.com LKML-Reference: <4AF6E7E2.9080406@miraclelinux.com> [ Modified a few small details, gave surrounding code some love. ] Signed-off-by: Ingo Molnar diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9107b38..3bbd92f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2032,8 +2032,15 @@ and is between 256 and 4096 characters. It is defined in the file print-fatal-signals= [KNL] debug: print fatal signals - print-fatal-signals=1: print segfault info to - the kernel console. + + If enabled, warn about various signal handling + related application anomalies: too many signals, + too many POSIX.1 timers, fatal signals causing a + coredump - etc. + + If you hit the warning due to signal overflow, + you might want to try "ulimit -i unlimited". + default: off. printk.time= Show timing data prefixed to each printk message line diff --git a/kernel/signal.c b/kernel/signal.c index 6705320..fe08008 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,8 @@ static struct kmem_cache *sigqueue_cachep; +int print_fatal_signals __read_mostly; + static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; @@ -159,7 +162,7 @@ int next_signal(struct sigpending *pending, sigset_t *mask) { unsigned long i, *s, *m, x; int sig = 0; - + s = pending->signal.sig; m = mask->sig; switch (_NSIG_WORDS) { @@ -184,17 +187,31 @@ int next_signal(struct sigpending *pending, sigset_t *mask) sig = ffz(~x) + 1; break; } - + return sig; } +static inline void print_dropped_signal(int sig) +{ + static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); + + if (!print_fatal_signals) + return; + + if (!__ratelimit(&ratelimit_state)) + return; + + printk(KERN_INFO "%s/%d: reached RLIMIT_SIGPENDING, dropped signal %d\n", + current->comm, current->pid, sig); +} + /* * allocate a new signal queue record * - this may be called without locks if and only if t == current, otherwise an * appopriate lock must be held to stop the target task from exiting */ -static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, - int override_rlimit) +static struct sigqueue * +__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) { struct sigqueue *q = NULL; struct user_struct *user; @@ -207,10 +224,15 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, */ user = get_uid(__task_cred(t)->user); atomic_inc(&user->sigpending); + if (override_rlimit || atomic_read(&user->sigpending) <= - t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) + t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) { q = kmem_cache_alloc(sigqueue_cachep, flags); + } else { + print_dropped_signal(sig); + } + if (unlikely(q == NULL)) { atomic_dec(&user->sigpending); free_uid(user); @@ -869,7 +891,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, else override_rlimit = 0; - q = __sigqueue_alloc(t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE, + q = __sigqueue_alloc(sig, t, GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE, override_rlimit); if (q) { list_add_tail(&q->list, &pending->list); @@ -925,8 +947,6 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, return __send_signal(sig, info, t, group, from_ancestor_ns); } -int print_fatal_signals; - static void print_fatal_signal(struct pt_regs *regs, int signr) { printk("%s/%d: potentially unexpected fatal signal %d.\n", @@ -1293,19 +1313,19 @@ EXPORT_SYMBOL(kill_pid); * These functions support sending signals using preallocated sigqueue * structures. This is needed "because realtime applications cannot * afford to lose notifications of asynchronous events, like timer - * expirations or I/O completions". In the case of Posix Timers + * expirations or I/O completions". In the case of Posix Timers * we allocate the sigqueue structure from the timer_create. If this * allocation fails we are able to report the failure to the application * with an EAGAIN error. */ - struct sigqueue *sigqueue_alloc(void) { - struct sigqueue *q; + struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); - if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0))) + if (q) q->flags |= SIGQUEUE_PREALLOC; - return(q); + + return q; } void sigqueue_free(struct sigqueue *q) -- cgit v0.10.2 From ca2b900f9af1586b9889ccc4b12e453c13268bd5 Mon Sep 17 00:00:00 2001 From: Zeev Tarantov Date: Mon, 9 Nov 2009 13:26:13 +0200 Subject: perf tools: Fix syntax in documentation Fix trivial syntax in perf-events user-space tools documentation. Signed-off-by: Zeev Tarantov Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <12d7e64c0911081811i7e5b466cu6706ff6ab3e70db4@mail.gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 59f0b84..9dccb18 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -24,11 +24,11 @@ OPTIONS --dsos=:: Only consider symbols in these dsos. CSV that understands file://filename entries. --n ---show-nr-samples +-n:: +--show-nr-samples:: Show the number of samples for each symbol --T ---threads +-T:: +--threads:: Show per-thread event counters -C:: --comms=:: -- cgit v0.10.2 From dd8dbf2e6880e30c00b18600c962d0cb5a03c555 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 3 Nov 2009 16:35:32 +1100 Subject: security: report the module name to security_module_request For SELinux to do better filtering in userspace we send the name of the module along with the AVC denial when a program is denied module_request. Example output: type=SYSCALL msg=audit(11/03/2009 10:59:43.510:9) : arch=x86_64 syscall=write success=yes exit=2 a0=3 a1=7fc28c0d56c0 a2=2 a3=7fffca0d7440 items=0 ppid=1727 pid=1729 auid=unset uid=root gid=root euid=root suid=root fsuid=root egid=root sgid=root fsgid=root tty=(none) ses=unset comm=rpc.nfsd exe=/usr/sbin/rpc.nfsd subj=system_u:system_r:nfsd_t:s0 key=(null) type=AVC msg=audit(11/03/2009 10:59:43.510:9) : avc: denied { module_request } for pid=1729 comm=rpc.nfsd kmod="net-pf-10" scontext=system_u:system_r:nfsd_t:s0 tcontext=system_u:system_r:kernel_t:s0 tclass=system Signed-off-by: Eric Paris Signed-off-by: James Morris diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 190c378..f78f83d 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -26,14 +26,15 @@ /* Auxiliary data to use in generating the audit record. */ struct common_audit_data { - char type; -#define LSM_AUDIT_DATA_FS 1 -#define LSM_AUDIT_DATA_NET 2 -#define LSM_AUDIT_DATA_CAP 3 -#define LSM_AUDIT_DATA_IPC 4 -#define LSM_AUDIT_DATA_TASK 5 -#define LSM_AUDIT_DATA_KEY 6 -#define LSM_AUDIT_NO_AUDIT 7 + char type; +#define LSM_AUDIT_DATA_FS 1 +#define LSM_AUDIT_DATA_NET 2 +#define LSM_AUDIT_DATA_CAP 3 +#define LSM_AUDIT_DATA_IPC 4 +#define LSM_AUDIT_DATA_TASK 5 +#define LSM_AUDIT_DATA_KEY 6 +#define LSM_AUDIT_NO_AUDIT 7 +#define LSM_AUDIT_DATA_KMOD 8 struct task_struct *tsk; union { struct { @@ -66,6 +67,7 @@ struct common_audit_data { char *key_desc; } key_struct; #endif + char *kmod_name; } u; /* this union contains LSM specific data */ union { diff --git a/include/linux/security.h b/include/linux/security.h index ed0faea..466cbad 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -706,6 +706,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @kernel_module_request: * Ability to trigger the kernel to automatically upcall to userspace for * userspace to load a kernel module with the given name. + * @kmod_name name of the module requested by the kernel * Return 0 if successful. * @task_setuid: * Check permission before setting one or more of the user identity @@ -1577,7 +1578,7 @@ struct security_operations { void (*cred_transfer)(struct cred *new, const struct cred *old); int (*kernel_act_as)(struct cred *new, u32 secid); int (*kernel_create_files_as)(struct cred *new, struct inode *inode); - int (*kernel_module_request)(void); + int (*kernel_module_request)(char *kmod_name); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); @@ -1842,7 +1843,7 @@ void security_commit_creds(struct cred *new, const struct cred *old); void security_transfer_creds(struct cred *new, const struct cred *old); int security_kernel_act_as(struct cred *new, u32 secid); int security_kernel_create_files_as(struct cred *new, struct inode *inode); -int security_kernel_module_request(void); +int security_kernel_module_request(char *kmod_name); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); @@ -2407,7 +2408,7 @@ static inline int security_kernel_create_files_as(struct cred *cred, return 0; } -static inline int security_kernel_module_request(void) +static inline int security_kernel_module_request(char *kmod_name) { return 0; } diff --git a/kernel/kmod.c b/kernel/kmod.c index 9fcb53a..25b1031 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -80,16 +80,16 @@ int __request_module(bool wait, const char *fmt, ...) #define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */ static int kmod_loop_msg; - ret = security_kernel_module_request(); - if (ret) - return ret; - va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); va_end(args); if (ret >= MODULE_NAME_LEN) return -ENAMETOOLONG; + ret = security_kernel_module_request(module_name); + if (ret) + return ret; + /* If modprobe needs a service that is in a module, we get a recursive * loop. Limit the number of running kmod threads to max_threads/2 or * MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method diff --git a/security/capability.c b/security/capability.c index 4f3ab47..5c700e1 100644 --- a/security/capability.c +++ b/security/capability.c @@ -421,7 +421,7 @@ static int cap_kernel_create_files_as(struct cred *new, struct inode *inode) return 0; } -static int cap_kernel_module_request(void) +static int cap_kernel_module_request(char *kmod_name) { return 0; } diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 3bb90b6..51bd0fd 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -354,6 +354,10 @@ static void dump_common_audit_data(struct audit_buffer *ab, } break; #endif + case LSM_AUDIT_DATA_KMOD: + audit_log_format(ab, " kmod="); + audit_log_untrustedstring(ab, a->u.kmod_name); + break; } /* switch (a->type) */ } diff --git a/security/security.c b/security/security.c index aad71b2..24e060b 100644 --- a/security/security.c +++ b/security/security.c @@ -764,9 +764,9 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode) return security_ops->kernel_create_files_as(new, inode); } -int security_kernel_module_request(void) +int security_kernel_module_request(char *kmod_name) { - return security_ops->kernel_module_request(); + return security_ops->kernel_module_request(kmod_name); } int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a29d661..c96d63e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3337,9 +3337,18 @@ static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode) return 0; } -static int selinux_kernel_module_request(void) +static int selinux_kernel_module_request(char *kmod_name) { - return task_has_system(current, SYSTEM__MODULE_REQUEST); + u32 sid; + struct common_audit_data ad; + + sid = task_sid(current); + + COMMON_AUDIT_DATA_INIT(&ad, KMOD); + ad.u.kmod_name = kmod_name; + + return avc_has_perm(sid, SECINITSID_KERNEL, SECCLASS_SYSTEM, + SYSTEM__MODULE_REQUEST, &ad); } static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) -- cgit v0.10.2 From 281d150c5f8892f158747594ab49ce2823fd8b8c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Nov 2009 13:52:27 -0800 Subject: rcu: Prepare for synchronization fixes: clean up for non-NO_HZ handling of ->completed counter Impose a clear locking design on non-NO_HZ handling of the ->completed counter. This increases the distance between the RCU and the CPU-hotplug mechanisms. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: # .32.x LKML-Reference: <12571987491353-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 69f4efe..26249ab 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -178,9 +178,29 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) return &rsp->node[0]; } +/* + * Record the specified "completed" value, which is later used to validate + * dynticks counter manipulations and CPU-offline checks. Specify + * "rsp->completed - 1" to unconditionally invalidate any future dynticks + * manipulations and CPU-offline checks. Such invalidation is useful at + * the beginning of a grace period. + */ +static void dyntick_record_completed(struct rcu_state *rsp, long comp) +{ + rsp->dynticks_completed = comp; +} + #ifdef CONFIG_SMP /* + * Recall the previously recorded value of the completion for dynticks. + */ +static long dyntick_recall_completed(struct rcu_state *rsp) +{ + return rsp->dynticks_completed; +} + +/* * If the specified CPU is offline, tell the caller that it is in * a quiescent state. Otherwise, whack it with a reschedule IPI. * Grace periods can end up waiting on an offline CPU when that @@ -337,28 +357,9 @@ void rcu_irq_exit(void) set_need_resched(); } -/* - * Record the specified "completed" value, which is later used to validate - * dynticks counter manipulations. Specify "rsp->completed - 1" to - * unconditionally invalidate any future dynticks manipulations (which is - * useful at the beginning of a grace period). - */ -static void dyntick_record_completed(struct rcu_state *rsp, long comp) -{ - rsp->dynticks_completed = comp; -} - #ifdef CONFIG_SMP /* - * Recall the previously recorded value of the completion for dynticks. - */ -static long dyntick_recall_completed(struct rcu_state *rsp) -{ - return rsp->dynticks_completed; -} - -/* * Snapshot the specified CPU's dynticks counter so that we can later * credit them with an implicit quiescent state. Return 1 if this CPU * is in dynticks idle mode, which is an extended quiescent state. @@ -421,24 +422,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) #else /* #ifdef CONFIG_NO_HZ */ -static void dyntick_record_completed(struct rcu_state *rsp, long comp) -{ -} - #ifdef CONFIG_SMP -/* - * If there are no dynticks, then the only way that a CPU can passively - * be in a quiescent state is to be offline. Unlike dynticks idle, which - * is a point in time during the prior (already finished) grace period, - * an offline CPU is always in a quiescent state, and thus can be - * unconditionally applied. So just return the current value of completed. - */ -static long dyntick_recall_completed(struct rcu_state *rsp) -{ - return rsp->completed; -} - static int dyntick_save_progress_counter(struct rcu_data *rdp) { return 0; @@ -1146,6 +1131,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) long lastcomp; struct rcu_node *rnp = rcu_get_root(rsp); u8 signaled; + u8 forcenow; if (!rcu_gp_in_progress(rsp)) return; /* No grace period in progress, nothing to force. */ @@ -1182,16 +1168,23 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) if (rcu_process_dyntick(rsp, lastcomp, dyntick_save_progress_counter)) goto unlock_ret; + /* fall into next case. */ + + case RCU_SAVE_COMPLETED: /* Update state, record completion counter. */ + forcenow = 0; spin_lock(&rnp->lock); if (lastcomp == rsp->completed && - rsp->signaled == RCU_SAVE_DYNTICK) { + rsp->signaled == signaled) { rsp->signaled = RCU_FORCE_QS; dyntick_record_completed(rsp, lastcomp); + forcenow = signaled == RCU_SAVE_COMPLETED; } spin_unlock(&rnp->lock); - break; + if (!forcenow) + break; + /* fall into next case. */ case RCU_FORCE_QS: diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 1899023..8a4c165 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -204,11 +204,12 @@ struct rcu_data { #define RCU_GP_IDLE 0 /* No grace period in progress. */ #define RCU_GP_INIT 1 /* Grace period being initialized. */ #define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */ -#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ +#define RCU_SAVE_COMPLETED 3 /* Need to save rsp->completed. */ +#define RCU_FORCE_QS 4 /* Need to force quiescent state. */ #ifdef CONFIG_NO_HZ #define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK #else /* #ifdef CONFIG_NO_HZ */ -#define RCU_SIGNAL_INIT RCU_FORCE_QS +#define RCU_SIGNAL_INIT RCU_SAVE_COMPLETED #endif /* #else #ifdef CONFIG_NO_HZ */ #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ @@ -274,9 +275,8 @@ struct rcu_state { unsigned long jiffies_stall; /* Time at which to check */ /* for CPU stalls. */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -#ifdef CONFIG_NO_HZ long dynticks_completed; /* Value of completed @ snap. */ -#endif /* #ifdef CONFIG_NO_HZ */ + /* Protected by fqslock. */ }; #ifdef RCU_TREE_NONCORE -- cgit v0.10.2 From d09b62dfa336447c52a5ec9bb88adbc479b0f3b8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Nov 2009 13:52:28 -0800 Subject: rcu: Fix synchronization for rcu_process_gp_end() uses of ->completed counter Impose a clear locking design on the rcu_process_gp_end() function's use of the ->completed counter. This is done by creating a ->completed field in the rcu_node structure, which can safely be accessed under the protection of that structure's lock. Performance and scalability are maintained by using a form of double-checked locking, so that rcu_process_gp_end() only acquires the leaf rcu_node structure's ->lock if a grace period has recently ended. This fix reduces rcutorture failure rate by at least two orders of magnitude under heavy stress with force_quiescent_state() being invoked artificially often. Without this fix, unsynchronized access to the ->completed field can cause rcu_process_gp_end() to advance callbacks whose grace period has not yet expired. (Bad idea!) Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: # .32.x LKML-Reference: <12571987494069-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 26249ab..9e068d1 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -570,6 +570,76 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) } /* + * Advance this CPU's callbacks, but only if the current grace period + * has ended. This may be called only from the CPU to whom the rdp + * belongs. In addition, the corresponding leaf rcu_node structure's + * ->lock must be held by the caller, with irqs disabled. + */ +static void +__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + /* Did another grace period end? */ + if (rdp->completed != rnp->completed) { + + /* Advance callbacks. No harm if list empty. */ + rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Remember that we saw this grace-period completion. */ + rdp->completed = rnp->completed; + } +} + +/* + * Advance this CPU's callbacks, but only if the current grace period + * has ended. This may be called only from the CPU to whom the rdp + * belongs. + */ +static void +rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) +{ + unsigned long flags; + struct rcu_node *rnp; + + local_irq_save(flags); + rnp = rdp->mynode; + if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */ + !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + local_irq_restore(flags); + return; + } + __rcu_process_gp_end(rsp, rnp, rdp); + spin_unlock_irqrestore(&rnp->lock, flags); +} + +/* + * Do per-CPU grace-period initialization for running CPU. The caller + * must hold the lock of the leaf rcu_node structure corresponding to + * this CPU. + */ +static void +rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + /* Prior grace period ended, so advance callbacks for current CPU. */ + __rcu_process_gp_end(rsp, rnp, rdp); + + /* + * Because this CPU just now started the new grace period, we know + * that all of its callbacks will be covered by this upcoming grace + * period, even the ones that were registered arbitrarily recently. + * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. + * + * Other CPUs cannot be sure exactly when the grace period started. + * Therefore, their recently registered callbacks must pass through + * an additional RCU_NEXT_READY stage, so that they will be handled + * by the next RCU grace period. + */ + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; +} + +/* * Start a new RCU grace period if warranted, re-initializing the hierarchy * in preparation for detecting the next grace period. The caller must hold * the root node's ->lock, which is released before return. Hard irqs must @@ -596,26 +666,14 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) dyntick_record_completed(rsp, rsp->completed - 1); note_new_gpnum(rsp, rdp); - /* - * Because this CPU just now started the new grace period, we know - * that all of its callbacks will be covered by this upcoming grace - * period, even the ones that were registered arbitrarily recently. - * Therefore, advance all outstanding callbacks to RCU_WAIT_TAIL. - * - * Other CPUs cannot be sure exactly when the grace period started. - * Therefore, their recently registered callbacks must pass through - * an additional RCU_NEXT_READY stage, so that they will be handled - * by the next RCU grace period. - */ - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - /* Special-case the common single-level case. */ if (NUM_RCU_NODES == 1) { rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; + rnp->completed = rsp->completed; rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ + rcu_start_gp_per_cpu(rsp, rnp, rdp); spin_unlock_irqrestore(&rnp->lock, flags); return; } @@ -648,6 +706,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rcu_preempt_check_blocked_tasks(rnp); rnp->qsmask = rnp->qsmaskinit; rnp->gpnum = rsp->gpnum; + rnp->completed = rsp->completed; + if (rnp == rdp->mynode) + rcu_start_gp_per_cpu(rsp, rnp, rdp); spin_unlock(&rnp->lock); /* irqs remain disabled. */ } @@ -659,34 +720,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) } /* - * Advance this CPU's callbacks, but only if the current grace period - * has ended. This may be called only from the CPU to whom the rdp - * belongs. - */ -static void -rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) -{ - long completed_snap; - unsigned long flags; - - local_irq_save(flags); - completed_snap = ACCESS_ONCE(rsp->completed); /* outside of lock. */ - - /* Did another grace period end? */ - if (rdp->completed != completed_snap) { - - /* Advance callbacks. No harm if list empty. */ - rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; - rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; - rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - - /* Remember that we saw this grace-period completion. */ - rdp->completed = completed_snap; - } - local_irq_restore(flags); -} - -/* * Clean up after the prior grace period and let rcu_start_gp() start up * the next grace period if one is needed. Note that the caller must * hold rnp->lock, as required by rcu_start_gp(), which will release it. @@ -697,7 +730,6 @@ static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); rsp->completed = rsp->gpnum; rsp->signaled = RCU_GP_IDLE; - rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ } @@ -1539,21 +1571,16 @@ static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) { unsigned long flags; - long lastcomp; unsigned long mask; struct rcu_data *rdp = rsp->rda[cpu]; struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ spin_lock_irqsave(&rnp->lock, flags); - lastcomp = rsp->completed; - rdp->completed = lastcomp; - rdp->gpnum = lastcomp; rdp->passed_quiesc = 0; /* We could be racing with new GP, */ rdp->qs_pending = 1; /* so set up to respond to current GP. */ rdp->beenonline = 1; /* We have now been online. */ rdp->preemptable = preemptable; - rdp->passed_quiesc_completed = lastcomp - 1; rdp->qlen_last_fqs_check = 0; rdp->n_force_qs_snap = rsp->n_force_qs; rdp->blimit = blimit; @@ -1575,6 +1602,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->qsmaskinit |= mask; mask = rnp->grpmask; + if (rnp == rdp->mynode) { + rdp->gpnum = rnp->completed; /* if GP in progress... */ + rdp->completed = rnp->completed; + rdp->passed_quiesc_completed = rnp->completed - 1; + } spin_unlock(&rnp->lock); /* irqs already disabled. */ rnp = rnp->parent; } while (rnp != NULL && !(rnp->qsmaskinit & mask)); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 8a4c165..c1891c3 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -84,6 +84,9 @@ struct rcu_node { long gpnum; /* Current grace period for this node. */ /* This will either be equal to or one */ /* behind the root rcu_node's gpnum. */ + long completed; /* Last grace period completed for this node. */ + /* This will either be equal to or one */ + /* behind the root rcu_node's gpnum. */ unsigned long qsmask; /* CPUs or groups that need to switch in */ /* order for current grace period to proceed.*/ /* In leaf rcu_node, each bit corresponds to */ -- cgit v0.10.2 From 9160306e6f5b68bb64630c9031c517ca1cf463db Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Nov 2009 13:52:29 -0800 Subject: rcu: Fix note_new_gpnum() uses of ->gpnum Impose a clear locking design on the note_new_gpnum() function's use of the ->gpnum counter. This is done by updating rdp->gpnum only from the corresponding leaf rcu_node structure's rnp->gpnum field, and even then only under the protection of that same rcu_node structure's ->lock field. Performance and scalability are maintained using a form of double-checked locking, and excessive spinning is avoided by use of the spin_trylock() function. The use of spin_trylock() is safe due to the fact that CPUs who fail to acquire this lock will try again later. The hierarchical nature of the rcu_node data structure limits contention (which could be limited further if need be using the RCU_FANOUT kernel parameter). Without this patch, obscure but quite possible races could result in a quiescent state that occurred during one grace period to be accounted to the following grace period, causing this following grace period to end prematurely. Not good! Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: # .32.x LKML-Reference: <12571987492350-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 9e068d1..ec007dd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -540,13 +540,33 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) /* * Update CPU-local rcu_data state to record the newly noticed grace period. * This is used both when we started the grace period and when we notice - * that someone else started the grace period. + * that someone else started the grace period. The caller must hold the + * ->lock of the leaf rcu_node structure corresponding to the current CPU, + * and must have irqs disabled. */ +static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) +{ + if (rdp->gpnum != rnp->gpnum) { + rdp->qs_pending = 1; + rdp->passed_quiesc = 0; + rdp->gpnum = rnp->gpnum; + } +} + static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) { - rdp->qs_pending = 1; - rdp->passed_quiesc = 0; - rdp->gpnum = rsp->gpnum; + unsigned long flags; + struct rcu_node *rnp; + + local_irq_save(flags); + rnp = rdp->mynode; + if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */ + !spin_trylock(&rnp->lock)) { /* irqs already off, retry later. */ + local_irq_restore(flags); + return; + } + __note_new_gpnum(rsp, rnp, rdp); + spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -637,6 +657,9 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat */ rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Set state so that this CPU will detect the next quiescent state. */ + __note_new_gpnum(rsp, rnp, rdp); } /* @@ -664,7 +687,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; record_gp_stall_check_time(rsp); dyntick_record_completed(rsp, rsp->completed - 1); - note_new_gpnum(rsp, rdp); /* Special-case the common single-level case. */ if (NUM_RCU_NODES == 1) { -- cgit v0.10.2 From eae0c9dfb534cb3449888b9601228efa6480fdb5 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 10 Nov 2009 03:50:02 +0100 Subject: sched: Fix and clean up rate-limit newidle code Commit 1b9508f, "Rate-limit newidle" has been confirmed to fix the netperf UDP loopback regression reported by Alex Shi. This is a cleanup and a fix: - moved to a more out of the way spot - fix to ensure that balancing doesn't try to balance runqueues which haven't gone online yet, which can mess up CPU enumeration during boot. Reported-by: Alex Shi Reported-by: Zhang, Yanmin Signed-off-by: Mike Galbraith Acked-by: Peter Zijlstra Cc: # .32.x: a1f84a3: sched: Check for an idle shared cache Cc: # .32.x: 1b9508f: sched: Rate-limit newidle Cc: # .32.x: fd21073: sched: Fix affinity logic Cc: # .32.x LKML-Reference: <1257821402.5648.17.camel@marge.simson.net> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 23e3535..ad37776 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2354,17 +2354,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, if (rq != orig_rq) update_rq_clock(rq); - if (rq->idle_stamp) { - u64 delta = rq->clock - rq->idle_stamp; - u64 max = 2*sysctl_sched_migration_cost; - - if (delta > max) - rq->avg_idle = max; - else - update_avg(&rq->avg_idle, delta); - rq->idle_stamp = 0; - } - WARN_ON(p->state != TASK_WAKING); cpu = task_cpu(p); @@ -2421,6 +2410,17 @@ out_running: #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) p->sched_class->task_wake_up(rq, p); + + if (unlikely(rq->idle_stamp)) { + u64 delta = rq->clock - rq->idle_stamp; + u64 max = 2*sysctl_sched_migration_cost; + + if (delta > max) + rq->avg_idle = max; + else + update_avg(&rq->avg_idle, delta); + rq->idle_stamp = 0; + } #endif out: task_rq_unlock(rq, &flags); @@ -4098,7 +4098,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, unsigned long flags; struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); - cpumask_setall(cpus); + cpumask_copy(cpus, cpu_online_mask); /* * When power savings policy is enabled for the parent domain, idle @@ -4261,7 +4261,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) int all_pinned = 0; struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); - cpumask_setall(cpus); + cpumask_copy(cpus, cpu_online_mask); /* * When power savings policy is enabled for the parent domain, idle @@ -9522,6 +9522,8 @@ void __init sched_init(void) rq->cpu = i; rq->online = 0; rq->migration_thread = NULL; + rq->idle_stamp = 0; + rq->avg_idle = 2*sysctl_sched_migration_cost; INIT_LIST_HEAD(&rq->migration_queue); rq_attach_root(rq, &def_root_domain); #endif -- cgit v0.10.2 From 242aa14a67f4e19453fc8a51cffc5ac5ee5bcbd1 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Tue, 10 Nov 2009 08:19:59 +0900 Subject: perf bench: Add format constants to bench.h for unified output formatting This patch adds some constants and extern declaration to bench.h. These are used for unified output formatting of 'perf bench'. Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1257808802-9420-2-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 59adb27..42167ea 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -6,4 +6,13 @@ extern int bench_sched_messaging(int argc, const char **argv, extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); +#define BENCH_FORMAT_DEFAULT_STR "default" +#define BENCH_FORMAT_DEFAULT 0 +#define BENCH_FORMAT_SIMPLE_STR "simple" +#define BENCH_FORMAT_SIMPLE 1 + +#define BENCH_FORMAT_UNKNOWN -1 + +extern int bench_format; + #endif -- cgit v0.10.2 From 386d7e9e542c2115d5d300747e57f503458a1617 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Tue, 10 Nov 2009 08:20:00 +0900 Subject: perf bench: Modify builtin-bench.c for processing common options This patch modifies builtin-bench.c for processing common options. The first option added is "--format". Users of perf bench will be able to specify output style by --format. Usage example: % ./perf bench sched messaging # with no style specify (20 sender and receiver processes per group) (10 groups == 400 processes run) Total time:1.431 sec % ./perf bench --format=simple sched messaging # specified simple 1.431 Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1257808802-9420-3-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 31f4164..c7505ea 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -74,53 +74,104 @@ static void dump_suites(int subsys_index) return; } +static char *bench_format_str; +int bench_format = BENCH_FORMAT_DEFAULT; + +static const struct option bench_options[] = { + OPT_STRING('f', "format", &bench_format_str, "default", + "Specify format style"), + OPT_END() +}; + +static const char * const bench_usage[] = { + "perf bench [] []", + NULL +}; + +static void print_usage(void) +{ + int i; + + printf("Usage: \n"); + for (i = 0; bench_usage[i]; i++) + printf("\t%s\n", bench_usage[i]); + printf("\n"); + + printf("List of available subsystems...\n\n"); + + for (i = 0; subsystems[i].name; i++) + printf("\t%s: %s\n", + subsystems[i].name, subsystems[i].summary); + printf("\n"); +} + +static int bench_str2int(char *str) +{ + if (!str) + return BENCH_FORMAT_DEFAULT; + + if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR)) + return BENCH_FORMAT_DEFAULT; + else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR)) + return BENCH_FORMAT_SIMPLE; + + return BENCH_FORMAT_UNKNOWN; +} + int cmd_bench(int argc, const char **argv, const char *prefix __used) { int i, j, status = 0; if (argc < 2) { /* No subsystem specified. */ - printf("Usage: perf bench []\n\n"); - printf("List of available subsystems...\n\n"); + print_usage(); + goto end; + } - for (i = 0; subsystems[i].name; i++) - printf("\t%s: %s\n", - subsystems[i].name, subsystems[i].summary); - printf("\n"); + argc = parse_options(argc, argv, bench_options, bench_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + bench_format = bench_str2int(bench_format_str); + if (bench_format == BENCH_FORMAT_UNKNOWN) { + printf("Unknown format descriptor:%s\n", bench_format_str); + goto end; + } + if (argc < 1) { + print_usage(); goto end; } for (i = 0; subsystems[i].name; i++) { - if (strcmp(subsystems[i].name, argv[1])) + if (strcmp(subsystems[i].name, argv[0])) continue; - if (argc < 3) { + if (argc < 2) { /* No suite specified. */ dump_suites(i); goto end; } for (j = 0; subsystems[i].suites[j].name; j++) { - if (strcmp(subsystems[i].suites[j].name, argv[2])) + if (strcmp(subsystems[i].suites[j].name, argv[1])) continue; - status = subsystems[i].suites[j].fn(argc - 2, - argv + 2, prefix); + status = subsystems[i].suites[j].fn(argc - 1, + argv + 1, prefix); goto end; } - if (!strcmp(argv[2], "-h") || !strcmp(argv[2], "--help")) { + if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { dump_suites(i); goto end; } - printf("Unknown suite:%s for %s\n", argv[2], argv[1]); + printf("Unknown suite:%s for %s\n", argv[1], argv[0]); status = 1; goto end; } - printf("Unknown subsystem:%s\n", argv[1]); + printf("Unknown subsystem:%s\n", argv[0]); status = 1; end: -- cgit v0.10.2 From cced06c62a9db6bd6d77e3f0a57dbe47a26d881e Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Tue, 10 Nov 2009 08:20:01 +0900 Subject: perf bench: Modify bench/bench-messaging.c to adopt unified output formatting This patch modifies bench/bench-messaging.c to adopt unified output formatting: --format option. Usage example: % ./perf bench sched messaging # with no style specify (20 sender and receiver processes per group) (10 groups == 400 processes run) Total time:1.431 sec % ./perf bench --format=simple sched messaging # specified simple 1.431 Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1257808802-9420-4-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index 36b62c5..2cc5edc 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -35,7 +35,6 @@ static int use_pipes = 0; static unsigned int loops = 100; static unsigned int thread_mode = 0; static unsigned int num_groups = 10; -static int simple = 0; struct sender_context { unsigned int num_fds; @@ -261,9 +260,6 @@ static const struct option options[] = { "Specify number of groups"), OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), - OPT_BOOLEAN('s', "simple-output", &simple, - "Do simple output (this maybe useful for" - "processing by scripts or graph tools like gnuplot)"), OPT_END() }; @@ -316,9 +312,8 @@ int bench_sched_messaging(int argc, const char **argv, timersub(&stop, &start, &diff); - if (simple) - printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000); - else { + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: printf("(%d sender and receiver %s per group)\n", num_fds, thread_mode ? "threads" : "processes"); printf("(%d groups == %d %s run)\n\n", @@ -326,6 +321,15 @@ int bench_sched_messaging(int argc, const char **argv, thread_mode ? "threads" : "processes"); printf("\tTotal time:%lu.%03lu sec\n", diff.tv_sec, diff.tv_usec/1000); + break; + case BENCH_FORMAT_SIMPLE: + printf("%lu.%03lu\n", diff.tv_sec, diff.tv_usec/1000); + break; + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; } return 0; -- cgit v0.10.2 From 158ba827f6deef4102c5247ed4b6a587f0bd6a07 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Tue, 10 Nov 2009 08:20:02 +0900 Subject: perf bench: Modify builtin-pipe.c for processing common options This patch modifies builtin-pipe.c for processing common options. The first option added is "--format". Users of perf bench will be able to specify output style by --format. Usage example: % ./perf bench sched pipe # with no style specify (executing 1000000 pipe operations between two tasks) Total time:5.855 sec 5.855061 usecs/op 170792 ops/sec % ./perf bench --format=simple sched pipe # specified simple 5.988 Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1257808802-9420-5-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Paul Mackerras diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 6a29100..a9ac186 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -30,14 +30,10 @@ #define LOOPS_DEFAULT 1000000 static int loops = LOOPS_DEFAULT; -static int simple = 0; static const struct option options[] = { OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), - OPT_BOOLEAN('s', "simple-output", &simple, - "Do simple output (this maybe useful for" - "processing by scripts or graph tools like gnuplot)"), OPT_END() }; @@ -94,10 +90,8 @@ int bench_sched_pipe(int argc, const char **argv, return 0; } - if (simple) - printf("%lu.%03lu\n", - diff.tv_sec, diff.tv_usec / 1000); - else { + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: printf("(executing %d pipe operations between two tasks)\n\n", loops); @@ -111,6 +105,18 @@ int bench_sched_pipe(int argc, const char **argv, printf("\t\t%d ops/sec\n", (int)((double)loops / ((double)result_usec / (double)1000000))); + break; + + case BENCH_FORMAT_SIMPLE: + printf("%lu.%03lu\n", + diff.tv_sec, diff.tv_usec / 1000); + break; + + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; } return 0; -- cgit v0.10.2 From 7abc07531383ac7f727cc9d44e1360a829f2082e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 10 Nov 2009 01:06:59 +0300 Subject: x86: apic: Do not use stacked physid_mask_t We should not use physid_mask_t as a stack based variable in apic code. This type depends on MAX_APICS parameter which may be huge enough. Especially it became a problem with apic NOOP driver which is portable between 32 bit and 64 bit environment (where we have really huge MAX_APICS). So apic driver should operate with pointers and a caller in turn should aware of allocation physid_mask_t variable. As a side (but positive) effect -- we may use already implemented physid_set_mask_of_physid function eliminating default_apicid_to_cpu_present completely. Note that physids_coerce and physids_promote turned into static inline from macro (since macro hides the fact that parameter is being interpreted as unsigned long, make it explicit). Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu Cc: Maciej W. Rozycki Cc: Stephen Rothwell LKML-Reference: <20091109220659.GA5568@lenovo> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 08a5f42..b4ac2cd 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -297,20 +297,20 @@ struct apic { int disable_esr; int dest_logical; - unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid); + unsigned long (*check_apicid_used)(physid_mask_t *map, int apicid); unsigned long (*check_apicid_present)(int apicid); void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); void (*init_apic_ldr)(void); - physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map); + void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); void (*setup_apic_routing)(void); int (*multi_timer_check)(int apic, int irq); int (*apicid_to_node)(int logical_apicid); int (*cpu_to_logical_apicid)(int cpu); int (*cpu_present_to_apicid)(int mps_cpu); - physid_mask_t (*apicid_to_cpu_present)(int phys_apicid); + void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); void (*setup_portio_remap)(void); int (*check_phys_apicid_present)(int phys_apicid); void (*enable_apic_mode)(void); @@ -534,9 +534,9 @@ default_cpu_mask_to_apicid_and(const struct cpumask *cpumask, return (unsigned int)(mask1 & mask2 & mask3); } -static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid) +static inline unsigned long default_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static inline unsigned long default_check_apicid_present(int bit) @@ -544,9 +544,9 @@ static inline unsigned long default_check_apicid_present(int bit) return physid_isset(bit, phys_cpu_present_map); } -static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map) +static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { - return phys_map; + *retmap = *phys_map; } /* Mapping from cpu number to logical apicid */ @@ -585,11 +585,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); #endif -static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - #endif /* CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 79c9450..61d90b1 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -163,14 +163,16 @@ typedef struct physid_mask physid_mask_t; #define physids_shift_left(d, s, n) \ bitmap_shift_left((d).mask, (s).mask, n, MAX_APICS) -#define physids_coerce(map) ((map).mask[0]) +static inline unsigned long physids_coerce(physid_mask_t *map) +{ + return map->mask[0]; +} -#define physids_promote(physids) \ - ({ \ - physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ - __physid_mask.mask[0] = physids; \ - __physid_mask; \ - }) +static inline void physids_promote(unsigned long physids, physid_mask_t *map) +{ + physids_clear(*map); + map->mask[0] = physids; +} /* Note: will create very large stack frames if physid_mask_t is big */ #define physid_mask_of_physid(physid) \ diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 89629f6..d9acc3b 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void) return 0; } -static physid_mask_t noop_ioapic_phys_id_map(physid_mask_t phys_map) -{ - return phys_map; -} - static int noop_cpu_to_logical_apicid(int cpu) { return 0; @@ -100,9 +95,9 @@ static const struct cpumask *noop_target_cpus(void) return cpumask_of(0); } -static unsigned long noop_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static unsigned long noop_check_apicid_present(int bit) @@ -155,19 +150,14 @@ struct apic apic_noop = { .vector_allocation_domain = noop_vector_allocation_domain, .init_apic_ldr = noop_init_apic_ldr, - .ioapic_phys_id_map = noop_ioapic_phys_id_map, + .ioapic_phys_id_map = default_ioapic_phys_id_map, .setup_apic_routing = NULL, .multi_timer_check = NULL, .apicid_to_node = noop_apicid_to_node, .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, - -#ifdef CONFIG_X86_32 - .apicid_to_cpu_present = default_apicid_to_cpu_present, -#else - .apicid_to_cpu_present = NULL, -#endif + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 77a0641..38dcecf 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void) #endif } -static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } @@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) -{ - return physid_mask_of_physid(phys_apicid); -} - /* Mapping from cpu number to logical apicid */ static inline int bigsmp_cpu_to_logical_apicid(int cpu) { @@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu) return cpu_physical_id(cpu); } -static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) +static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xFFL); + physids_promote(0xFFL, retmap); } static int bigsmp_check_phys_apicid_present(int phys_apicid) @@ -230,7 +225,7 @@ struct apic apic_bigsmp = { .apicid_to_node = bigsmp_apicid_to_node, .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, - .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = bigsmp_check_phys_apicid_present, .enable_apic_mode = NULL, diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 89174f8..e85f8fb 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -466,11 +466,11 @@ static const struct cpumask *es7000_target_cpus(void) return cpumask_of(smp_processor_id()); } -static unsigned long -es7000_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } + static unsigned long es7000_check_apicid_present(int bit) { return physid_isset(bit, phys_cpu_present_map); @@ -539,14 +539,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu) static int cpu_id; -static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) +static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) { - physid_mask_t mask; - - mask = physid_mask_of_physid(cpu_id); + physid_set_mask_of_physid(cpu_id, retmap); ++cpu_id; - - return mask; } /* Mapping from cpu number to logical apicid */ @@ -561,10 +557,10 @@ static int es7000_cpu_to_logical_apicid(int cpu) #endif } -static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) +static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0xff); + physids_promote(0xFFL, retmap); } static int es7000_check_phys_apicid_present(int cpu_physical_apicid) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 24d1458..20ea839 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2031,7 +2031,7 @@ void __init setup_ioapic_ids_from_mpc(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -2058,7 +2058,7 @@ void __init setup_ioapic_ids_from_mpc(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(phys_id_present_map, + if (apic->check_apicid_used(&phys_id_present_map, mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", apic_id, mp_ioapics[apic_id].apicid); @@ -2073,7 +2073,7 @@ void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); + apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", mp_ioapics[apic_id].apicid); @@ -3904,7 +3904,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) */ if (physids_empty(apic_id_map)) - apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); @@ -3920,10 +3920,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) * Every APIC in a system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(apic_id_map, apic_id)) { + if (apic->check_apicid_used(&apic_id_map, apic_id)) { for (i = 0; i < get_physical_broadcast(); i++) { - if (!apic->check_apicid_used(apic_id_map, i)) + if (!apic->check_apicid_used(&apic_id_map, i)) break; } @@ -3936,7 +3936,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - tmp = apic->apicid_to_cpu_present(apic_id); + apic->apicid_to_cpu_present(apic_id, &tmp); physids_or(apic_id_map, apic_id_map, tmp); if (reg_00.bits.ID != apic_id) { diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index efa00e2..07cdbdc 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c @@ -334,10 +334,9 @@ static inline const struct cpumask *numaq_target_cpus(void) return cpu_all_mask; } -static inline unsigned long -numaq_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) { - return physid_isset(apicid, bitmap); + return physid_isset(apicid, *map); } static inline unsigned long numaq_check_apicid_present(int bit) @@ -371,10 +370,10 @@ static inline int numaq_multi_timer_check(int apic, int irq) return apic != 0 && irq == 0; } -static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) +static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) { /* We don't have a good way to do this yet - hack */ - return physids_promote(0xFUL); + return physids_promote(0xFUL, retmap); } static inline int numaq_cpu_to_logical_apicid(int cpu) @@ -402,12 +401,12 @@ static inline int numaq_apicid_to_node(int logical_apicid) return logical_apicid >> 4; } -static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) +static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) { int node = numaq_apicid_to_node(logical_apicid); int cpu = __ffs(logical_apicid & 0xf); - return physid_mask_of_physid(cpu + 4*node); + physid_set_mask_of_physid(cpu + 4*node, retmap); } /* Where the IO area was mapped on multiquad, always 0 otherwise */ diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 0c0182c..1a6559f 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -108,7 +108,7 @@ struct apic apic_default = { .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .apicid_to_cpu_present = default_apicid_to_cpu_present, + .apicid_to_cpu_present = physid_set_mask_of_physid, .setup_portio_remap = NULL, .check_phys_apicid_present = default_check_phys_apicid_present, .enable_apic_mode = NULL, diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 645ecc4..9b41926 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c @@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void) return cpumask_of(0); } -static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) +static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid) { return 0; } @@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu) return BAD_APICID; } -static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map) +static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap) { /* For clustered we don't have a good way to do this yet - hack */ - return physids_promote(0x0F); + physids_promote(0x0FL, retmap); } -static physid_mask_t summit_apicid_to_cpu_present(int apicid) +static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap) { - return physid_mask_of_physid(0); + physid_set_mask_of_physid(0, retmap); } static int summit_check_phys_apicid_present(int physical_apicid) diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index f068553..cff70c8 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -183,7 +183,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) return; } - apic_cpus = apic->apicid_to_cpu_present(m->apicid); + apic->apicid_to_cpu_present(m->apicid, &apic_cpus); physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); /* * Validate version -- cgit v0.10.2 From a2202aa29289db64ca7988b12343158b67b27f10 Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Tue, 10 Nov 2009 09:38:24 +0800 Subject: x86: Under BIOS control, restore AP's APIC_LVTTHMR to the BSP value On platforms where the BIOS handles the thermal monitor interrupt, APIC_LVTTHMR on each logical CPU is programmed to generate a SMI and OS must not touch it. Unfortunately AP bringup sequence using INIT-SIPI-SIPI clears all the LVT entries except the mask bit. Essentially this results in all LVT entries including the thermal monitoring interrupt set to masked (clearing the bios programmed value for APIC_LVTTHMR). And this leads to kernel take over the thermal monitoring interrupt on AP's but not on BSP (leaving the bios programmed value only on BSP). As a result of this, we have seen system hangs when the thermal monitoring interrupt is generated. Fix this by reading the initial value of thermal LVT entry on BSP and if bios has taken over the control, then program the same value on all AP's and leave the thermal monitoring interrupt control on all the logical cpu's to the bios. Signed-off-by: Yong Wang Reviewed-by: Suresh Siddha Cc: Borislav Petkov Cc: Arjan van de Ven LKML-Reference: <20091110013824.GA24940@ywang-moblin2.bj.intel.com> Signed-off-by: Ingo Molnar Cc: stable@kernel.org diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 161485d..858baa0 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -120,8 +120,10 @@ extern int mce_disabled; extern int mce_p5_enabled; #ifdef CONFIG_X86_MCE +int mcheck_init(void); void mcheck_cpu_init(struct cpuinfo_x86 *c); #else +static inline int mcheck_init(void) { return 0; } static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} #endif @@ -215,5 +217,12 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); void intel_init_thermal(struct cpuinfo_x86 *c); void mce_log_therm_throt_event(__u64 status); + +#ifdef CONFIG_X86_THERMAL_VECTOR +extern void mcheck_intel_therm_init(void); +#else +static inline void mcheck_intel_therm_init(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4df69a3..9053be5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -837,10 +837,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; } -#ifdef CONFIG_X86_MCE /* Init Machine Check Exception if available. */ mcheck_cpu_init(c); -#endif select_idle_routine(c); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8080170..0d41020 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1655,13 +1655,14 @@ static int __init mcheck_enable(char *str) } __setup("mce", mcheck_enable); -static int __init mcheck_init(void) +int __init mcheck_init(void) { atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); + mcheck_intel_therm_init(); + return 0; } -early_initcall(mcheck_init); /* * Sysfs support diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3a1dba..7f3cf36 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state); static atomic_t therm_throt_en = ATOMIC_INIT(0); +static u32 lvtthmr_init __read_mostly; + #ifdef CONFIG_SYSFS #define define_therm_throt_sysdev_one_ro(_name) \ static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) @@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } +void mcheck_intel_therm_init(void) +{ + /* + * This function is only called on boot CPU. Save the init thermal + * LVT value on BSP and use that value to restore APs' thermal LVT + * entry BIOS programmed later + */ + if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && + cpu_has(&boot_cpu_data, X86_FEATURE_ACC)) + lvtthmr_init = apic_read(APIC_LVTTHMR); +} + void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); @@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) * since it might be delivered via SMI already: */ rdmsr(MSR_IA32_MISC_ENABLE, l, h); - h = apic_read(APIC_LVTTHMR); + + /* + * The initial value of thermal LVT entries on all APs always reads + * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI + * sequence to them and LVT registers are reset to 0s except for + * the mask bits which are set to 1s when APs receive INIT IPI. + * Always restore the value that BIOS has programmed on AP based on + * BSP's info we saved since BIOS is always setting the same value + * for all threads/cores + */ + apic_write(APIC_LVTTHMR, lvtthmr_init); + + h = lvtthmr_init; + if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2..179c1f2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -109,6 +109,7 @@ #ifdef CONFIG_X86_64 #include #endif +#include /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -1024,6 +1025,8 @@ void __init setup_arch(char **cmdline_p) #endif #endif x86_init.oem.banner(); + + mcheck_init(); } #ifdef CONFIG_X86_32 -- cgit v0.10.2 From 41855b77547fa18d90ed6a5d322983d3fdab1959 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 9 Nov 2009 17:58:50 -0800 Subject: x86: GART: pci-gart_64.c: Use correct length in strncmp Signed-off-by: Joe Perches Cc: # .3x.x LKML-Reference: <1257818330.12852.72.camel@Joe-Laptop.home> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a9bcdf7..eb46ab3 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -858,7 +858,7 @@ void __init gart_parse_options(char *p) #endif if (isdigit(*p) && get_option(&p, &arg)) iommu_size = arg; - if (!strncmp(p, "fullflush", 8)) + if (!strncmp(p, "fullflush", 9)) iommu_fullflush = 1; if (!strncmp(p, "nofullflush", 11)) iommu_fullflush = 0; -- cgit v0.10.2 From 676c0dbe6e514fdd8e434a9e623c781aa9b40b15 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 9 Nov 2009 17:37:34 +0900 Subject: ksym_tracer: Support read accesses independent of read/write. All of the infrastructure already exists to support read accesses for platforms that support a read access independently of read/write (such as in the case of the SuperH UBC). This just trivially hooks up the read case by itself. Signed-off-by: Paul Mundt Cc: Ingo Molnar Cc: Li Zefan Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Jan Kiszka Cc: Jiri Slaby Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Arjan van de Ven LKML-Reference: <20091109083733.GA25848@linux-sh.org> Signed-off-by: Frederic Weisbecker diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index fea83ee..11935b5 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -136,6 +136,7 @@ static int ksym_trace_get_access_type(char *str) access |= HW_BREAKPOINT_X; switch (access) { + case HW_BREAKPOINT_R: case HW_BREAKPOINT_W: case HW_BREAKPOINT_W | HW_BREAKPOINT_R: return access; @@ -239,7 +240,9 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); - if (entry->type == HW_BREAKPOINT_W) + if (entry->type == HW_BREAKPOINT_R) + ret = trace_seq_puts(s, "r--\n"); + else if (entry->type == HW_BREAKPOINT_W) ret = trace_seq_puts(s, "-w-\n"); else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) ret = trace_seq_puts(s, "rw-\n"); @@ -414,6 +417,9 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) return TRACE_TYPE_PARTIAL_LINE; switch (field->type) { + case HW_BREAKPOINT_R: + ret = trace_seq_printf(s, " R "); + break; case HW_BREAKPOINT_W: ret = trace_seq_printf(s, " W "); break; @@ -488,6 +494,9 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) access_type = entry->type; switch (access_type) { + case HW_BREAKPOINT_R: + seq_puts(m, " R "); + break; case HW_BREAKPOINT_W: seq_puts(m, " W "); break; -- cgit v0.10.2 From 9f6b3c2c30cfbb1166ce7e74a8f9fd93ae19d2de Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 9 Nov 2009 21:03:43 +0100 Subject: hw-breakpoints: Fix broken a.out format dump Fix the broken a.out format dump. For now we only dump the ptrace breakpoints. TODO: Dump every perf breakpoints for the current thread, not only ptrace based ones. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: "K. Prasad" diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index fc4685d..7a15588 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -17,6 +17,7 @@ #include #include +#include /* * fill in the user structure for an a.out core dump @@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - dump->u_debugreg[0] = current->thread.debugreg[0]; - dump->u_debugreg[1] = current->thread.debugreg[1]; - dump->u_debugreg[2] = current->thread.debugreg[2]; - dump->u_debugreg[3] = current->thread.debugreg[3]; - dump->u_debugreg[4] = 0; - dump->u_debugreg[5] = 0; - dump->u_debugreg[6] = current->thread.debugreg6; - dump->u_debugreg[7] = current->thread.debugreg7; + aout_dump_debugregs(dump); if (dump->start_stack < TASK_SIZE) dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 9a3333c..f1b673f 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -89,6 +89,8 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } +extern void aout_dump_debugregs(struct user *dump); + #ifdef CONFIG_KVM extern void hw_breakpoint_restore(void); #endif diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index e622620..57dcee5f 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -376,6 +376,41 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, } /* + * Dump the debug register contents to the user. + * We can't dump our per cpu values because it + * may contain cpu wide breakpoint, something that + * doesn't belong to the current task. + * + * TODO: include non-ptrace user breakpoints (perf) + */ +void aout_dump_debugregs(struct user *dump) +{ + int i; + int dr7 = 0; + struct perf_event *bp; + struct arch_hw_breakpoint *info; + struct thread_struct *thread = ¤t->thread; + + for (i = 0; i < HBP_NUM; i++) { + bp = thread->ptrace_bps[i]; + + if (bp && !bp->attr.disabled) { + dump->u_debugreg[i] = bp->attr.bp_addr; + info = counter_arch_bp(bp); + dr7 |= encode_dr7(i, info->len, info->type); + } else { + dump->u_debugreg[i] = 0; + } + } + + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + + dump->u_debugreg[7] = dr7; +} + +/* * Release the user breakpoints used by ptrace */ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) -- cgit v0.10.2 From f60d24d2ad04977b0bd9e3eb35dba2d2fa569af9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 10 Nov 2009 10:17:07 +0100 Subject: hw-breakpoints: Fix broken hw-breakpoint sample module The hw-breakpoint sample module has been broken during the hw-breakpoint internals refactoring. Propagate the changes to it. Reported-by: "K. Prasad" Signed-off-by: Frederic Weisbecker diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index e662dc9..9ea9414 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -454,6 +454,7 @@ fail: /* return the error if any */ return ERR_PTR(err); } +EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); /** * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel @@ -470,7 +471,7 @@ void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) } free_percpu(cpu_events); } - +EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); static struct notifier_block hw_breakpoint_exceptions_nb = { .notifier_call = hw_breakpoint_exceptions_notify, diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 8b6b8b6..8e5288a 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name) } return module_kallsyms_lookup_name(name); } +EXPORT_SYMBOL_GPL(kallsyms_lookup_name); int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c index 9cbdbb8..5bc9819 100644 --- a/samples/hw_breakpoint/data_breakpoint.c +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -27,18 +27,19 @@ #include /* Needed by all modules */ #include /* Needed for KERN_INFO */ #include /* Needed for the macros */ +#include -#include +#include +#include -struct hw_breakpoint sample_hbp; +struct perf_event **sample_hbp; static char ksym_name[KSYM_NAME_LEN] = "pid_max"; module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" " write operations on the kernel symbol"); -void sample_hbp_handler(struct hw_breakpoint *temp, struct pt_regs - *temp_regs) +static void sample_hbp_handler(struct perf_event *temp, void *data) { printk(KERN_INFO "%s value is changed\n", ksym_name); dump_stack(); @@ -48,30 +49,34 @@ void sample_hbp_handler(struct hw_breakpoint *temp, struct pt_regs static int __init hw_break_module_init(void) { int ret; + unsigned long addr; -#ifdef CONFIG_X86 - sample_hbp.info.name = ksym_name; - sample_hbp.info.type = HW_BREAKPOINT_WRITE; - sample_hbp.info.len = HW_BREAKPOINT_LEN_4; -#endif /* CONFIG_X86 */ + addr = kallsyms_lookup_name(ksym_name); - sample_hbp.triggered = (void *)sample_hbp_handler; + sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4, + HW_BREAKPOINT_W | HW_BREAKPOINT_R, + sample_hbp_handler, true); + if (IS_ERR(sample_hbp)) { + ret = PTR_ERR(sample_hbp); + goto fail; + } else if (!sample_hbp) { + ret = -EINVAL; + goto fail; + } - ret = register_kernel_hw_breakpoint(&sample_hbp); - - if (ret < 0) { - printk(KERN_INFO "Breakpoint registration failed\n"); - return ret; - } else - printk(KERN_INFO "HW Breakpoint for %s write installed\n", - ksym_name); + printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name); return 0; + +fail: + printk(KERN_INFO "Breakpoint registration failed\n"); + + return ret; } static void __exit hw_break_module_exit(void) { - unregister_kernel_hw_breakpoint(&sample_hbp); + unregister_wide_hw_breakpoint(sample_hbp); printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name); } -- cgit v0.10.2 From 59d8eb53ea9947db7cad8ebc31b0fb54f23a9851 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 10 Nov 2009 11:03:12 +0100 Subject: hw-breakpoints: Wrap in the KVM breakpoint active state check Wrap in the cpu dr7 check that tells if we have active breakpoints that need to be restored in the cpu. This wrapper makes the check more self-explainable and also reusable for any further other uses. Reported-by: Jan Kiszka Signed-off-by: Frederic Weisbecker Cc: Avi Kivity Cc: "K. Prasad" diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index f1b673f..0f6e92a 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -89,6 +89,11 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } +static inline int hw_breakpoint_active(void) +{ + return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK; +} + extern void aout_dump_debugregs(struct user *dump); #ifdef CONFIG_KVM diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 22dee7a..3817220 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3651,7 +3651,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) * care about the messed up debug address registers. But if * we have some of them active, restore the old state. */ - if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK) + if (hw_breakpoint_active()) hw_breakpoint_restore(); set_bit(KVM_REQ_KICK, &vcpu->requests); -- cgit v0.10.2 From d07c1be0693e0902d743160b8b638585b808f8ac Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:12 +0900 Subject: x86: Add iommu_init to x86_init_ops We call the detections functions of all the IOMMUs then all their initialization functions. The latter is pointless since we don't detect multiple different IOMMUs. What we need to do is calling the initialization function of the detected IOMMU. This adds iommu_init hook to x86_init_ops so if an IOMMU detection function can set its initialization function to the hook. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 66008ed..d8e7145 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -91,6 +91,14 @@ struct x86_init_timers { }; /** + * struct x86_init_iommu - platform specific iommu setup + * @iommu_init: platform specific iommu setup + */ +struct x86_init_iommu { + int (*iommu_init)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -101,6 +109,7 @@ struct x86_init_ops { struct x86_init_oem oem; struct x86_init_paging paging; struct x86_init_timers timers; + struct x86_init_iommu iommu; }; /** diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 839d49a..a13478d 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -292,6 +292,8 @@ static int __init pci_iommu_init(void) dma_debug_add_bus(&pci_bus_type); #endif + x86_init.iommu.iommu_init(); + calgary_iommu_init(); intel_iommu_init(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index bc9b230..c46984d 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -19,6 +19,7 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } void __init x86_init_pgd_noop(pgd_t *unused) { } +int __init iommu_init_noop(void) { return 0; } /* * The platform setup functions are preset with the default functions @@ -63,6 +64,10 @@ struct x86_init_ops x86_init __initdata = { .tsc_pre_init = x86_init_noop, .timer_init = hpet_time_init, }, + + .iommu = { + .iommu_init = iommu_init_noop, + }, }; struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { -- cgit v0.10.2 From d7b9f7be216b04ff9d108f856bc03d96e7b3439c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:13 +0900 Subject: x86: Calgary: Convert detect_calgary() to use iommu_init hook This changes detect_calgary() to set init_calgary() to iommu_init hook if detect_calgary() finds the Calgary IOMMU. We can kill the code to check if we found the IOMMU in init_calgary() since detect_calgary() sets init_calgary() only when it found the IOMMU. Signed-off-by: FUJITA Tomonori Acked-by: Muli Ben-Yehuda Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com LKML-Reference: <1257849980-22640-3-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h index b03bedb..0918654 100644 --- a/arch/x86/include/asm/calgary.h +++ b/arch/x86/include/asm/calgary.h @@ -62,10 +62,8 @@ struct cal_chipset_ops { extern int use_calgary; #ifdef CONFIG_CALGARY_IOMMU -extern int calgary_iommu_init(void); extern void detect_calgary(void); #else -static inline int calgary_iommu_init(void) { return 1; } static inline void detect_calgary(void) { return; } #endif diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 971a3be..47bd419 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -46,6 +46,7 @@ #include #include #include +#include #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; @@ -1344,6 +1345,8 @@ static void __init get_tce_space_from_tar(void) return; } +int __init calgary_iommu_init(void); + void __init detect_calgary(void) { int bus; @@ -1445,6 +1448,8 @@ void __init detect_calgary(void) /* swiotlb for devices that aren't behind the Calgary. */ if (max_pfn > MAX_DMA32_PFN) swiotlb = 1; + + x86_init.iommu.iommu_init = calgary_iommu_init; } return; @@ -1461,12 +1466,6 @@ int __init calgary_iommu_init(void) { int ret; - if (no_iommu || (swiotlb && !calgary_detected)) - return -ENODEV; - - if (!calgary_detected) - return -ENODEV; - /* ok, we're trying to use Calgary - let's roll */ printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index a13478d..0224da8 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -294,8 +294,6 @@ static int __init pci_iommu_init(void) x86_init.iommu.iommu_init(); - calgary_iommu_init(); - intel_iommu_init(); amd_iommu_init(); -- cgit v0.10.2 From de957628ce7c84764ff41331111036b3ae5bad0f Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:14 +0900 Subject: x86: GART: Convert gart_iommu_hole_init() to use iommu_init hook This changes gart_iommu_hole_init() to set gart_iommu_init() to iommu_init hook if gart_iommu_hole_init() finds the GART IOMMU. We can kill the code to check if we found the IOMMU in gart_iommu_init() since gart_iommu_hole_init() sets gart_iommu_init() only when it found the IOMMU. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-4-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 4fdd5b3..4ac5b0f 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h @@ -35,7 +35,7 @@ extern int gart_iommu_aperture_allowed; extern int gart_iommu_aperture_disabled; extern void early_gart_iommu_check(void); -extern void gart_iommu_init(void); +extern int gart_iommu_init(void); extern void __init gart_parse_options(char *); extern void gart_iommu_hole_init(void); @@ -47,9 +47,6 @@ extern void gart_iommu_hole_init(void); static inline void early_gart_iommu_check(void) { } -static inline void gart_iommu_init(void) -{ -} static inline void gart_parse_options(char *options) { } diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 128111d..03933cf 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -28,6 +28,7 @@ #include #include #include +#include int gart_iommu_aperture; int gart_iommu_aperture_disabled __initdata; @@ -400,6 +401,7 @@ void __init gart_iommu_hole_init(void) iommu_detected = 1; gart_iommu_aperture = 1; + x86_init.iommu.iommu_init = gart_iommu_init; aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; aper_size = (32 * 1024 * 1024) << aper_order; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0224da8..ecde854 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -298,8 +298,6 @@ static int __init pci_iommu_init(void) amd_iommu_init(); - gart_iommu_init(); - no_iommu_init(); return 0; } diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index eb46ab3..0410bd3 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -709,7 +709,7 @@ static void gart_iommu_shutdown(void) } } -void __init gart_iommu_init(void) +int __init gart_iommu_init(void) { struct agp_kern_info info; unsigned long iommu_start; @@ -719,7 +719,7 @@ void __init gart_iommu_init(void) long i; if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) - return; + return 0; #ifndef CONFIG_AGP_AMD64 no_agp = 1; @@ -731,13 +731,6 @@ void __init gart_iommu_init(void) (agp_copy_info(agp_bridge, &info) < 0); #endif - if (swiotlb) - return; - - /* Did we detect a different HW IOMMU? */ - if (iommu_detected && !gart_iommu_aperture) - return; - if (no_iommu || (!force_iommu && max_pfn <= MAX_DMA32_PFN) || !gart_iommu_aperture || @@ -747,7 +740,7 @@ void __init gart_iommu_init(void) "but GART IOMMU not available.\n"); printk(KERN_WARNING "falling back to iommu=soft.\n"); } - return; + return 0; } /* need to map that range */ @@ -840,6 +833,8 @@ void __init gart_iommu_init(void) flush_gart(); dma_ops = &gart_dma_ops; x86_platform.iommu_shutdown = gart_iommu_shutdown; + + return 0; } void __init gart_parse_options(char *p) -- cgit v0.10.2 From ea1b0d3945c7374849235b6ecaea1191ee1d9d50 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:15 +0900 Subject: x86: amd_iommu: Convert amd_iommu_detect() to use iommu_init hook This changes amd_iommu_detect() to set amd_iommu_init to iommu_init hook if amd_iommu_detect() finds the AMD IOMMU. We can kill the code to check if we found the IOMMU in amd_iommu_init() since amd_iommu_detect() sets amd_iommu_init() only when it found the IOMMU. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-5-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 3604669..b8ef2ee 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -23,7 +23,6 @@ #include #ifdef CONFIG_AMD_IOMMU -extern int amd_iommu_init(void); extern int amd_iommu_init_dma_ops(void); extern int amd_iommu_init_passthrough(void); extern void amd_iommu_detect(void); @@ -32,7 +31,6 @@ extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); extern void amd_iommu_apply_erratum_63(u16 devid); #else -static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } #endif diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 6acd43e..c41aabd 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * definitions for the ACPI scanning code @@ -1176,19 +1177,10 @@ static struct sys_device device_amd_iommu = { * functions. Finally it prints some information about AMD IOMMUs and * the driver state and enables the hardware. */ -int __init amd_iommu_init(void) +static int __init amd_iommu_init(void) { int i, ret = 0; - - if (no_iommu) { - printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); - return 0; - } - - if (!amd_iommu_detected) - return -ENODEV; - /* * First parse ACPI tables to find the largest Bus/Dev/Func * we need to handle. Upon this information the shared data @@ -1344,10 +1336,7 @@ void __init amd_iommu_detect(void) if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { iommu_detected = 1; amd_iommu_detected = 1; -#ifdef CONFIG_GART_IOMMU - gart_iommu_aperture_disabled = 1; - gart_iommu_aperture = 0; -#endif + x86_init.iommu.iommu_init = amd_iommu_init; } } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index ecde854..5ca44a9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -296,8 +296,6 @@ static int __init pci_iommu_init(void) intel_iommu_init(); - amd_iommu_init(); - no_iommu_init(); return 0; } -- cgit v0.10.2 From 9d5ce73a64be2be8112147a3e0b551ad9cd1247b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:16 +0900 Subject: x86: intel-iommu: Convert detect_intel_iommu to use iommu_init hook This changes detect_intel_iommu() to set intel_iommu_init() to iommu_init hook if detect_intel_iommu() finds the IOMMU. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-6-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: build fix for the !CONFIG_DMAR case ] Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 5ca44a9..bed05e2 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -294,8 +294,6 @@ static int __init pci_iommu_init(void) x86_init.iommu.iommu_init(); - intel_iommu_init(); - no_iommu_init(); return 0; } diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 22b02c6..bce9cd7 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -617,6 +617,10 @@ void __init detect_intel_iommu(void) !dmar_disabled) iommu_detected = 1; #endif +#ifdef CONFIG_X86 + if (ret) + x86_init.iommu.iommu_init = intel_iommu_init; +#endif } early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size); dmar_tbl = NULL; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 4a2b162..5de4c9e 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -208,16 +208,9 @@ struct dmar_atsr_unit { u8 include_all:1; /* include all ports */ }; -/* Intel DMAR initialization functions */ extern int intel_iommu_init(void); -#else -static inline int intel_iommu_init(void) -{ -#ifdef CONFIG_INTR_REMAP - return dmar_dev_scope_init(); -#else - return -ENODEV; -#endif -} -#endif /* !CONFIG_DMAR */ +#else /* !CONFIG_DMAR: */ +static inline int intel_iommu_init(void) { return -ENODEV; } +#endif /* CONFIG_DMAR */ + #endif /* __DMAR_H__ */ -- cgit v0.10.2 From 9f993ac3f708b661207ed7de521f245586217a68 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:17 +0900 Subject: bootmem: Add free_bootmem_late() Add a new function for freeing bootmem after the bootmem allocator has been released and the unreserved pages given to the page allocator. This allows us to reserve bootmem and then release it if we later discover it was not needed. ( This new API will be used by the swiotlb code to recover a significant amount of RAM (64MB). ) Signed-off-by: FUJITA Tomonori Acked-by: Pekka Enberg Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com Cc: hannes@cmpxchg.org Cc: tj@kernel.org Cc: akpm@linux-foundation.org Cc: Linus Torvalds LKML-Reference: <1257849980-22640-7-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index dd97fb8..b10ec49 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -53,6 +53,7 @@ extern void free_bootmem_node(pg_data_t *pgdat, unsigned long addr, unsigned long size); extern void free_bootmem(unsigned long addr, unsigned long size); +extern void free_bootmem_late(unsigned long addr, unsigned long size); /* * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE, diff --git a/mm/bootmem.c b/mm/bootmem.c index 555d5d2..d1dc23c 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -143,6 +143,30 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); } +/* + * free_bootmem_late - free bootmem pages directly to page allocator + * @addr: starting address of the range + * @size: size of the range in bytes + * + * This is only useful when the bootmem allocator has already been torn + * down, but we are still initializing the system. Pages are given directly + * to the page allocator, no bootmem metadata is updated because it is gone. + */ +void __init free_bootmem_late(unsigned long addr, unsigned long size) +{ + unsigned long cursor, end; + + kmemleak_free_part(__va(addr), size); + + cursor = PFN_UP(addr); + end = PFN_DOWN(addr + size); + + for (; cursor < end; cursor++) { + __free_pages_bootmem(pfn_to_page(cursor), 0); + totalram_pages++; + } +} + static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) { int aligned; -- cgit v0.10.2 From 5740afdb68abadc473fd5392df733558a58c1254 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:18 +0900 Subject: swiotlb: Add swiotlb_free() function swiotlb_free() function frees all allocated memory for swiotlb. We need to initialize swiotlb before IOMMU initialization (x86 and powerpc needs to allocate memory from bootmem allocator). If IOMMU initialization is successful, we need to free swiotlb resource (don't want to waste 64MB). Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-8-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: build fix for the !CONFIG_SWIOTLB case ] Signed-off-by: Ingo Molnar diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 73b1f1c..59bafa6 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -88,4 +88,10 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); +#ifdef CONFIG_SWIOTLB +extern void __init swiotlb_free(void); +#else +static inline void swiotlb_free(void) { } +#endif + #endif /* __LINUX_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index ac25cd2..eee512b 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -97,6 +97,8 @@ static phys_addr_t *io_tlb_orig_addr; */ static DEFINE_SPINLOCK(io_tlb_lock); +static int late_alloc; + static int __init setup_io_tlb_npages(char *str) { @@ -262,6 +264,8 @@ swiotlb_late_init_with_default_size(size_t default_size) swiotlb_print_info(bytes); + late_alloc = 1; + return 0; cleanup4: @@ -281,6 +285,32 @@ cleanup1: return -ENOMEM; } +void __init swiotlb_free(void) +{ + if (!io_tlb_overflow_buffer) + return; + + if (late_alloc) { + free_pages((unsigned long)io_tlb_overflow_buffer, + get_order(io_tlb_overflow)); + free_pages((unsigned long)io_tlb_orig_addr, + get_order(io_tlb_nslabs * sizeof(phys_addr_t))); + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); + free_pages((unsigned long)io_tlb_start, + get_order(io_tlb_nslabs << IO_TLB_SHIFT)); + } else { + free_bootmem_late(__pa(io_tlb_overflow_buffer), + io_tlb_overflow); + free_bootmem_late(__pa(io_tlb_orig_addr), + io_tlb_nslabs * sizeof(phys_addr_t)); + free_bootmem_late(__pa(io_tlb_list), + io_tlb_nslabs * sizeof(int)); + free_bootmem_late(__pa(io_tlb_start), + io_tlb_nslabs << IO_TLB_SHIFT); + } +} + static int is_swiotlb_buffer(phys_addr_t paddr) { return paddr >= virt_to_phys(io_tlb_start) && -- cgit v0.10.2 From ad32e8cb86e7894aac51c8963eaa9f36bb8a4e14 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:19 +0900 Subject: swiotlb: Defer swiotlb init printing, export swiotlb_print_info() This enables us to avoid printing swiotlb memory info when we initialize swiotlb. After swiotlb initialization, we could find that we don't need swiotlb. This patch removes the code to print swiotlb memory info in swiotlb_init() and exports the function to do that. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com Cc: tony.luck@intel.com Cc: benh@kernel.crashing.org LKML-Reference: <1257849980-22640-9-git-send-email-fujita.tomonori@lab.ntt.co.jp> [ -v2: merge up conflict ] Signed-off-by: Ingo Molnar diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 285aae8..53292ab 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -41,7 +41,7 @@ struct dma_map_ops swiotlb_dma_ops = { void __init swiotlb_dma_init(void) { dma_ops = &swiotlb_dma_ops; - swiotlb_init(); + swiotlb_init(1); } void __init pci_swiotlb_init(void) @@ -51,7 +51,7 @@ void __init pci_swiotlb_init(void) swiotlb = 1; printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); machvec_init("dig"); - swiotlb_init(); + swiotlb_init(1); dma_ops = &swiotlb_dma_ops; #else panic("Unable to find Intel IOMMU"); diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 53bcf3d..b152de3 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -345,7 +345,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 04f638d..df2c9e9 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -550,7 +550,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_SWIOTLB if (ppc_swiotlb_enable) - swiotlb_init(); + swiotlb_init(1); #endif paging_init(); diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index aaa6b78..ea20ef7 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -52,8 +52,7 @@ void __init pci_swiotlb_init(void) if (swiotlb_force) swiotlb = 1; if (swiotlb) { - printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); - swiotlb_init(); + swiotlb_init(0); dma_ops = &swiotlb_dma_ops; } } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 59bafa6..eb9bdb4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -20,8 +20,7 @@ struct scatterlist; */ #define IO_TLB_SHIFT 11 -extern void -swiotlb_init(void); +extern void swiotlb_init(int verbose); extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, @@ -94,4 +93,5 @@ extern void __init swiotlb_free(void); static inline void swiotlb_free(void) { } #endif +extern void swiotlb_print_info(void); #endif /* __LINUX_SWIOTLB_H */ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index eee512b..0c12d7c 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -123,8 +123,9 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, return phys_to_dma(hwdev, virt_to_phys(address)); } -static void swiotlb_print_info(unsigned long bytes) +void swiotlb_print_info(void) { + unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; phys_addr_t pstart, pend; pstart = virt_to_phys(io_tlb_start); @@ -142,7 +143,7 @@ static void swiotlb_print_info(unsigned long bytes) * structures for the software IO TLB used to implement the DMA API. */ void __init -swiotlb_init_with_default_size(size_t default_size) +swiotlb_init_with_default_size(size_t default_size, int verbose) { unsigned long i, bytes; @@ -178,14 +179,14 @@ swiotlb_init_with_default_size(size_t default_size) io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); if (!io_tlb_overflow_buffer) panic("Cannot allocate SWIOTLB overflow buffer!\n"); - - swiotlb_print_info(bytes); + if (verbose) + swiotlb_print_info(); } void __init -swiotlb_init(void) +swiotlb_init(int verbose) { - swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ + swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ } /* @@ -262,7 +263,7 @@ swiotlb_late_init_with_default_size(size_t default_size) if (!io_tlb_overflow_buffer) goto cleanup4; - swiotlb_print_info(bytes); + swiotlb_print_info(); late_alloc = 1; -- cgit v0.10.2 From 75f1cdf1dda92cae037ec848ae63690d91913eac Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 19:46:20 +0900 Subject: x86: Handle HW IOMMU initialization failure gracefully If HW IOMMU initialization fails (Intel VT-d often does this, typically due to BIOS bugs), we fall back to nommu. It doesn't work for the majority since nowadays we have more than 4GB memory so we must use swiotlb instead of nommu. The problem is that it's too late to initialize swiotlb when HW IOMMU initialization fails. We need to allocate swiotlb memory earlier from bootmem allocator. Chris explained the issue in detail: http://marc.info/?l=linux-kernel&m=125657444317079&w=2 The current x86 IOMMU initialization sequence is too complicated and handling the above issue makes it more hacky. This patch changes x86 IOMMU initialization sequence to handle the above issue cleanly. The new x86 IOMMU initialization sequence are: 1. we initialize the swiotlb (and setting swiotlb to 1) in the case of (max_pfn > MAX_DMA32_PFN && !no_iommu). dma_ops is set to swiotlb_dma_ops or nommu_dma_ops. if swiotlb usage is forced by the boot option, we finish here. 2. we call the detection functions of all the IOMMUs 3. the detection function sets x86_init.iommu.iommu_init to the IOMMU initialization function (so we can avoid calling the initialization functions of all the IOMMUs needlessly). 4. if the IOMMU initialization function doesn't need to swiotlb then sets swiotlb to zero (e.g. the initialization is sucessful). 5. if we find that swiotlb is set to zero, we free swiotlb resource. Signed-off-by: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-10-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 878b307..df42a71 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -2,7 +2,6 @@ #define _ASM_X86_IOMMU_H static inline void iommu_shutdown_noop(void) {} -extern void no_iommu_init(void); extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0285521..66237fd 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -2110,8 +2110,8 @@ int __init amd_iommu_init_dma_ops(void) prealloc_protection_domains(); iommu_detected = 1; - force_iommu = 1; bad_dma_address = 0; + swiotlb = 0; #ifdef CONFIG_GART_IOMMU gart_iommu_aperture_disabled = 1; gart_iommu_aperture = 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c41aabd..0d4581e 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1330,7 +1330,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) void __init amd_iommu_detect(void) { - if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) + if (no_iommu || (iommu_detected && !gart_iommu_aperture)) return; if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 03933cf..e0dfb68 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -458,7 +458,7 @@ out: if (aper_alloc) { /* Got the aperture from the AGP bridge */ - } else if (swiotlb && !valid_agp) { + } else if (!valid_agp) { /* Do nothing */ } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || force_iommu || diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 47bd419..833f491 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1360,7 +1360,7 @@ void __init detect_calgary(void) * if the user specified iommu=off or iommu=soft or we found * another HW IOMMU already, bail out. */ - if (swiotlb || no_iommu || iommu_detected) + if (no_iommu || iommu_detected) return; if (!use_calgary) @@ -1445,10 +1445,6 @@ void __init detect_calgary(void) printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", specified_table_size); - /* swiotlb for devices that aren't behind the Calgary. */ - if (max_pfn > MAX_DMA32_PFN) - swiotlb = 1; - x86_init.iommu.iommu_init = calgary_iommu_init; } return; @@ -1476,11 +1472,7 @@ int __init calgary_iommu_init(void) return ret; } - force_iommu = 1; bad_dma_address = 0x0; - /* dma_ops is set to swiotlb or nommu */ - if (!dma_ops) - dma_ops = &nommu_dma_ops; return 0; } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index bed05e2..a234e63 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -124,24 +124,24 @@ static void __init dma32_free_bootmem(void) void __init pci_iommu_alloc(void) { + /* swiotlb is forced by the boot option */ + int use_swiotlb = swiotlb; #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); #endif + pci_swiotlb_init(); + if (use_swiotlb) + return; - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ gart_iommu_hole_init(); detect_calgary(); detect_intel_iommu(); + /* needs to be called after gart_iommu_hole_init */ amd_iommu_detect(); - - pci_swiotlb_init(); } void *dma_generic_alloc_coherent(struct device *dev, size_t size, @@ -291,10 +291,15 @@ static int __init pci_iommu_init(void) #ifdef CONFIG_PCI dma_debug_add_bus(&pci_bus_type); #endif - x86_init.iommu.iommu_init(); - no_iommu_init(); + if (swiotlb) { + printk(KERN_INFO "PCI-DMA: " + "Using software bounce buffering for IO (SWIOTLB)\n"); + swiotlb_print_info(); + } else + swiotlb_free(); + return 0; } /* Must execute after PCI subsystem */ diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 0410bd3..919182e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -833,6 +833,7 @@ int __init gart_iommu_init(void) flush_gart(); dma_ops = &gart_dma_ops; x86_platform.iommu_shutdown = gart_iommu_shutdown; + swiotlb = 0; return 0; } diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a3933d4..875e382 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -103,12 +103,3 @@ struct dma_map_ops nommu_dma_ops = { .sync_sg_for_device = nommu_sync_sg_for_device, .is_phys = 1, }; - -void __init no_iommu_init(void) -{ - if (dma_ops) - return; - - force_iommu = 0; /* no HW IOMMU */ - dma_ops = &nommu_dma_ops; -} diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index ea20ef7..17ce422 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -46,13 +46,12 @@ void __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) + if (!no_iommu && max_pfn > MAX_DMA32_PFN) swiotlb = 1; #endif - if (swiotlb_force) - swiotlb = 1; if (swiotlb) { swiotlb_init(0); dma_ops = &swiotlb_dma_ops; - } + } else + dma_ops = &nommu_dma_ops; } diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index bce9cd7..4373996 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -613,8 +613,7 @@ void __init detect_intel_iommu(void) "x2apic and Intr-remapping.\n"); #endif #ifdef CONFIG_DMAR - if (ret && !no_iommu && !iommu_detected && !swiotlb && - !dmar_disabled) + if (ret && !no_iommu && !iommu_detected && !dmar_disabled) iommu_detected = 1; #endif #ifdef CONFIG_X86 diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index b1e97e6..43d755a 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3231,7 +3231,7 @@ int __init intel_iommu_init(void) * Check the need for DMA-remapping initialization now. * Above initialization will also be used by Interrupt-remapping. */ - if (no_iommu || swiotlb || dmar_disabled) + if (no_iommu || dmar_disabled) return -ENODEV; iommu_init_mempool(); @@ -3252,7 +3252,9 @@ int __init intel_iommu_init(void) "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); init_timer(&unmap_timer); - force_iommu = 1; +#ifdef CONFIG_SWIOTLB + swiotlb = 0; +#endif dma_ops = &intel_dma_ops; init_iommu_sysfs(); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 0c12d7c..e6755a0 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -109,8 +109,10 @@ setup_io_tlb_npages(char *str) } if (*str == ',') ++str; - if (!strcmp(str, "force")) + if (!strcmp(str, "force")) { swiotlb_force = 1; + swiotlb = 1; + } return 1; } __setup("swiotlb=", setup_io_tlb_npages); -- cgit v0.10.2 From 72d03802b8b5c841ab1da82bff0652628cbadf60 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 10 Nov 2009 21:35:17 +0900 Subject: x86, 32-bit: Fix swiotlb boot crash Ingo Molnar reported this boot crash: [ 8.655620] pata_amd 0000:00:06.0: version 0.4.1 [ 8.660286] BUG: unable to handle kernel NULL pointer dereference at 00000034 [ 8.663572] IP: [] dma_supported+0x3b/0xa4 [ 8.663572] *pde = 00000000 Initialize dma_ops properly in the 32-bit case. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index a234e63..63eebee 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -129,6 +129,8 @@ void __init pci_iommu_alloc(void) #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); +#else + dma_ops = &nommu_dma_ops; #endif pci_swiotlb_init(); if (use_swiotlb) -- cgit v0.10.2 From 606bc1e18d346fc7d7fb333909cc95b06b1ca5b1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 10 Nov 2009 20:50:53 +0900 Subject: perf bench: Clean up bench/bench.h Clean up initializers in bench.h: - No need to break the line for function prototypes, they are more readable in a single line. (even if checkpatch complains about it - We try to align definitions / structure fields vertically, to make it all a bit more readable. Signed-off-by: Ingo Molnar Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1257853855-28934-2-git-send-email-mitake@dcl.info.waseda.ac.jp> diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 42167ea..9fbd8d7 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -1,17 +1,15 @@ #ifndef BENCH_H #define BENCH_H -extern int bench_sched_messaging(int argc, const char **argv, - const char *prefix); -extern int bench_sched_pipe(int argc, const char **argv, - const char *prefix); +extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); +extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); -#define BENCH_FORMAT_DEFAULT_STR "default" -#define BENCH_FORMAT_DEFAULT 0 -#define BENCH_FORMAT_SIMPLE_STR "simple" -#define BENCH_FORMAT_SIMPLE 1 +#define BENCH_FORMAT_DEFAULT_STR "default" +#define BENCH_FORMAT_DEFAULT 0 +#define BENCH_FORMAT_SIMPLE_STR "simple" +#define BENCH_FORMAT_SIMPLE 1 -#define BENCH_FORMAT_UNKNOWN -1 +#define BENCH_FORMAT_UNKNOWN -1 extern int bench_format; -- cgit v0.10.2 From 9fbc04f2493929a69fd9e53b5fb53c127d7950d5 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Tue, 10 Nov 2009 20:50:54 +0900 Subject: perf bench: Add new document about perf-bench This patch adds new document about perf-bench. Man page and html will be provided for user. Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1257853855-28934-3-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt new file mode 100644 index 0000000..ae525ac --- /dev/null +++ b/tools/perf/Documentation/perf-bench.txt @@ -0,0 +1,120 @@ +perf-bench(1) +============ + +NAME +---- +perf-bench - General framework for benchmark suites + +SYNOPSIS +-------- +[verse] +'perf bench' [] [] + +DESCRIPTION +----------- +This 'perf bench' command is general framework for benchmark suites. + +COMMON OPTIONS +-------------- +-f:: +--format=:: +Specify format style. +Current available format styles are, + +'default':: +Default style. This is mainly for human reading. +--------------------- +% perf bench sched pipe # with no style specify +(executing 1000000 pipe operations between two tasks) + Total time:5.855 sec + 5.855061 usecs/op + 170792 ops/sec +--------------------- + +'simple':: +This simple style is friendly for automated +processing by scripts. +--------------------- +% perf bench --format=simple sched pipe # specified simple +5.988 +--------------------- + +SUBSYSTEM +--------- + +'sched':: + Scheduler and IPC mechanisms. + +SUITES FOR 'sched' +~~~~~~~~~~~~~~~~~~ +*messaging*:: +Suite for evaluating performance of scheduler and IPC mechanisms. +Based on hackbench by Rusty Russell. + +Options of *pipe* +^^^^^^^^^^^^^^^^^ +-p:: +--pipe:: +Use pipe() instead of socketpair() + +-t:: +--thread:: +Be multi thread instead of multi process + +-g:: +--group=:: +Specify number of groups + +-l:: +--loop=:: +Specify number of loops + +Example of *messaging* +^^^^^^^^^^^^^^^^^^^^^^ + +--------------------- +% perf bench sched messaging # run with default +options (20 sender and receiver processes per group) +(10 groups == 400 processes run) + + Total time:0.308 sec + +% perf bench sched messaging -t -g 20 # be multi-thread,with 20 groups +(20 sender and receiver threads per group) +(20 groups == 800 threads run) + + Total time:0.582 sec +--------------------- + +*pipe*:: +Suite for pipe() system call. +Based on pipe-test-1m.c by Ingo Molnar. + +Options of *pipe* +^^^^^^^^^^^^^^^^^ +-l:: +--loop=:: +Specify number of loops. + +Example of *pipe* +^^^^^^^^^^^^^^^^^ + +--------------------- +% perf bench sched pipe +(executing 1000000 pipe operations between two tasks) + + Total time:8.091 sec + 8.091833 usecs/op + 123581 ops/sec + +% perf bench sched pipe -l 1000 # loop 1000 +(executing 1000 pipe operations between two tasks) + + Total time:0.016 sec + 16.948000 usecs/op + 59004 ops/sec +--------------------- + +SEE ALSO +-------- +linkperf:perf[1] -- cgit v0.10.2 From 8d8d61aadb9d8cce07f7dcdb77a4c20a25d36d07 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Tue, 10 Nov 2009 20:50:55 +0900 Subject: perf bench: Modify command-list.txt for the entry of perf-bench This patch modifies command-list.txt for the entry of perf-bench. So perf will show 'bench' in command list. Example: % perf usage: perf [--version] [--help] COMMAND [ARGS] The most commonly used perf commands are: annotate Read perf.data (created by perf record) and display annotated code bench General framework for benchmark suites ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ list List all symbolic event types probe Define new dynamic tracepoints record Run a command and record its profile into perf.data report Read perf.data (created by perf record) and display the profile sched Tool to trace/measure scheduler properties (latencies) stat Run a command and gather performance counter statistics timechart Tool to visualize total system behavior during a workload top System profiling tool. trace Read perf.data (created by perf record) and display trace output See 'perf help COMMAND' for more information on a specific command. Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1257853855-28934-4-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 00326e2..981c40b 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -3,6 +3,7 @@ # command name category [deprecated] [common] # perf-annotate mainporcelain common +perf-bench mainporcelain common perf-list mainporcelain common perf-sched mainporcelain common perf-record mainporcelain common -- cgit v0.10.2 From b4941a9a606f0131559cc040b64e8437ac7b32c5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 10 Nov 2009 14:37:58 +0100 Subject: x86: Add iommu_init to x86_init_ops, fix build Most of the time x86_init.h is included in pci-dma.c - but not always, leading to this rare build failure: arch/x86/kernel/pci-dma.c:296: error: 'x86_init' undeclared (first use in this function) So include asm/x86_init.h explicitly. Cc: FUJITA Tomonori Cc: chrisw@sous-sol.org Cc: dwmw2@infradead.org Cc: joerg.roedel@amd.com Cc: muli@il.ibm.com LKML-Reference: <1257849980-22640-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 63eebee..f79870e 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -11,6 +11,7 @@ #include #include #include +#include static int forbid_dac __read_mostly; -- cgit v0.10.2 From 79e295d4bd0f524257299e7c4e42f643f21abcc2 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Wed, 11 Nov 2009 00:04:00 +0900 Subject: perf bench: Improve builtin-bench.c for more friendly output This patch makes output of perf bench more friendly. Current style of putput, keeping user wait and printing everything suddenly when we finish, may confuse users. So I improved it: | % perf bench sched messaging | # Running sched/messaging benchmark... <- printed right after invocation | # 20 sender and receiver processes per group | # 10 groups == 400 processes run | | Total time: 1.476 [sec] Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1257865442-20252-2-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index c7505ea..90c39ba 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -156,6 +156,10 @@ int cmd_bench(int argc, const char **argv, const char *prefix __used) if (strcmp(subsystems[i].suites[j].name, argv[1])) continue; + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Running %s/%s benchmark...\n", + subsystems[i].name, + subsystems[i].suites[j].name); status = subsystems[i].suites[j].fn(argc - 1, argv + 1, prefix); goto end; -- cgit v0.10.2 From ff676b193a401b23c84a79a7ec06559f3eaae917 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Wed, 11 Nov 2009 00:04:01 +0900 Subject: perf bench: Improve sched-pipe.c with more comfortable output This patch improves sched-pipe.c with more comfortable output. Change points are comment style description and formatting numerical values and its units. Example: | % ./perf bench sched pipe | # Running sched/pipe benchmark... | # Extecuted 1000000 pipe operations between two tasks | | Total time:5.822 [sec] | | 5.822553 usecs/op | 171745 ops/sec Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1257865442-20252-3-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index a9ac186..238185f 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -92,17 +92,18 @@ int bench_sched_pipe(int argc, const char **argv, switch (bench_format) { case BENCH_FORMAT_DEFAULT: - printf("(executing %d pipe operations between two tasks)\n\n", + printf("# Extecuted %d pipe operations between two tasks\n\n", loops); result_usec = diff.tv_sec * 1000000; result_usec += diff.tv_usec; - printf("\tTotal time:%lu.%03lu sec\n", - diff.tv_sec, diff.tv_usec / 1000); - printf("\t\t%lf usecs/op\n", + printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", + diff.tv_sec, diff.tv_usec/1000); + + printf(" %14lf usecs/op\n", (double)result_usec / (double)loops); - printf("\t\t%d ops/sec\n", + printf(" %14d ops/sec\n", (int)((double)loops / ((double)result_usec / (double)1000000))); break; -- cgit v0.10.2 From c5659b74f052150791750234f92dcfb29d27efa5 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Wed, 11 Nov 2009 00:04:02 +0900 Subject: perf bench: Improve sched-message.c with more comfortable output This patch improves sched-message.c with more comfortable output. Change points are comment style description and formatting numerical values and its units. Example: | % perf bench sched messaging | # Running sched/messaging benchmark... | # 20 sender and receiver processes per group | # 10 groups == 400 processes run | | Total time: 1.490 [sec] Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1257865442-20252-4-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Paul Mackerras diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index 2cc5edc..605a2a9 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -314,12 +314,12 @@ int bench_sched_messaging(int argc, const char **argv, switch (bench_format) { case BENCH_FORMAT_DEFAULT: - printf("(%d sender and receiver %s per group)\n", + printf("# %d sender and receiver %s per group\n", num_fds, thread_mode ? "threads" : "processes"); - printf("(%d groups == %d %s run)\n\n", + printf("# %d groups == %d %s run\n\n", num_groups, num_groups * 2 * num_fds, thread_mode ? "threads" : "processes"); - printf("\tTotal time:%lu.%03lu sec\n", + printf(" %14s: %lu.%03lu [sec]\n", "Total time", diff.tv_sec, diff.tv_usec/1000); break; case BENCH_FORMAT_SIMPLE: -- cgit v0.10.2 From ffd44db5f02af32bcc25a8eb5981bf02a141cdab Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Nov 2009 20:12:01 +0100 Subject: sched: Make sure task has correct sched_class after policy change From the code in rt_mutex_setprio(), it is evident that the intention is that task's with a RT 'prio' value as a consequence of receiving a PI boost also have their 'sched_class' field set to '&rt_sched_class'. However, Peter noticed that the code in __setscheduler() could result in this intention being frustrated. Fix it. Reported-by: Peter Williams Signed-off-by: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <1257880321.4108.457.camel@laptop> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index ad37776..43e61fa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6159,22 +6159,14 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) BUG_ON(p->se.on_rq); p->policy = policy; - switch (p->policy) { - case SCHED_NORMAL: - case SCHED_BATCH: - case SCHED_IDLE: - p->sched_class = &fair_sched_class; - break; - case SCHED_FIFO: - case SCHED_RR: - p->sched_class = &rt_sched_class; - break; - } - p->rt_priority = prio; p->normal_prio = normal_prio(p); /* we are holding p->pi_lock already */ p->prio = rt_mutex_getprio(p); + if (rt_prio(p->prio)) + p->sched_class = &rt_sched_class; + else + p->sched_class = &fair_sched_class; set_load_weight(p); } -- cgit v0.10.2 From 200a9ae2801bc725f2c41ab13f6e0fb1610d2fb6 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Tue, 10 Nov 2009 13:58:35 -0600 Subject: x86: Remove asm/apicnum.h arch/x86/include/asm/apicnum.h is not referenced anywhere anymore. Its definitions appear in apicdef.h. Remove it. Signed-off-by: Dimitri Sivanich Acked-by: Cyrill Gorcunov Acked-by: Mike Travis LKML-Reference: <20091110195835.GA4393@sgi.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h deleted file mode 100644 index 82f613c..0000000 --- a/arch/x86/include/asm/apicnum.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_X86_APICNUM_H -#define _ASM_X86_APICNUM_H - -/* define MAX_IO_APICS */ -#ifdef CONFIG_X86_32 -# define MAX_IO_APICS 64 -#else -# define MAX_IO_APICS 128 -# define MAX_LOCAL_APIC 32768 -#endif - -#endif /* _ASM_X86_APICNUM_H */ -- cgit v0.10.2 From dbe01350fa8ce0c11948ab7d6be71a4d901be151 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Nov 2009 13:37:19 -0800 Subject: rcu: Remove inline from forward-referenced functions Some variants of gcc are reputed to dislike forward references to functions declared "inline". Remove the "inline" keyword from such functions. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12578890422402-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.h b/kernel/rcutree.h index c1891c3..ddb79ec 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -301,7 +301,7 @@ DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); #else /* #ifdef RCU_TREE_NONCORE */ /* Forward declarations for rcutree_plugin.h */ -static inline void rcu_bootup_announce(void); +static void rcu_bootup_announce(void); long rcu_batches_completed(void); static void rcu_preempt_note_context_switch(int cpu); static int rcu_preempted_readers(struct rcu_node *rnp); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ef2a58c..c03edf7 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); /* * Tell them what RCU they are running. */ -static inline void rcu_bootup_announce(void) +static void rcu_bootup_announce(void) { printk(KERN_INFO "Experimental preemptable hierarchical RCU implementation.\n"); @@ -481,7 +481,7 @@ void exit_rcu(void) /* * Tell them what RCU they are running. */ -static inline void rcu_bootup_announce(void) +static void rcu_bootup_announce(void) { printk(KERN_INFO "Hierarchical RCU implementation.\n"); } -- cgit v0.10.2 From 956539b75921f561c0956c22d37320780e8b4ba1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Nov 2009 13:37:20 -0800 Subject: rcu: Enable synchronize_sched_expedited() fastpath This patch adds a counter increment to enable tasks to actually take the synchronize_sched_expedited() function's fastpath. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1257889042435-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 76c0e96..e69fee4 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -10865,6 +10865,7 @@ void synchronize_sched_expedited(void) spin_unlock_irqrestore(&rq->lock, flags); } rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; + synchronize_sched_expedited_count++; mutex_unlock(&rcu_sched_expedited_mutex); put_online_cpus(); if (need_full_sync) -- cgit v0.10.2 From 4bcfe055030d9e953945def3864f7e6997b27782 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Nov 2009 13:37:21 -0800 Subject: rcu: Rename dynticks_completed to completed_fqs This field is used whether or not CONFIG_NO_HZ is set, so the old name of ->dynticks_completed is quite misleading. Change to ->completed_fqs, given that it the value that force_quiescent_state() is trying to drive the ->completed field away from. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12578890423298-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ec007dd..26fc780 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -187,7 +187,7 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) */ static void dyntick_record_completed(struct rcu_state *rsp, long comp) { - rsp->dynticks_completed = comp; + rsp->completed_fqs = comp; } #ifdef CONFIG_SMP @@ -197,7 +197,7 @@ static void dyntick_record_completed(struct rcu_state *rsp, long comp) */ static long dyntick_recall_completed(struct rcu_state *rsp) { - return rsp->dynticks_completed; + return rsp->completed_fqs; } /* diff --git a/kernel/rcutree.h b/kernel/rcutree.h index ddb79ec..17a28a0 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -264,6 +264,8 @@ struct rcu_state { long orphan_qlen; /* Number of orphaned cbs. */ spinlock_t fqslock; /* Only one task forcing */ /* quiescent states. */ + long completed_fqs; /* Value of completed @ snap. */ + /* Protected by fqslock. */ unsigned long jiffies_force_qs; /* Time at which to invoke */ /* force_quiescent_state(). */ unsigned long n_force_qs; /* Number of calls to */ @@ -278,8 +280,6 @@ struct rcu_state { unsigned long jiffies_stall; /* Time at which to check */ /* for CPU stalls. */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ - long dynticks_completed; /* Value of completed @ snap. */ - /* Protected by fqslock. */ }; #ifdef RCU_TREE_NONCORE -- cgit v0.10.2 From c64ac3ce06558e534aec62b1fadeb0a3f111dac1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 10 Nov 2009 13:37:22 -0800 Subject: rcu: Simplify association of quiescent states with grace periods The rdp->passed_quiesc_completed fields are used to properly associate the recorded quiescent state with a grace period. It is OK to wrongly associate a given quiescent state with a preceding grace period, but it is fatal to associate a given quiescent state with a grace period that begins after the quiescent state occurred. Grace periods are numbered, and the following fields track them: o ->gpnum is the number of the grace period currently in progress, or the number of the last grace period to complete if no grace period is currently in progress. o ->completed is the number of the last grace period to have completed. These two fields are equal if there is no grace period in progress, otherwise ->gpnum is one greater than ->completed. But the rdp->passed_quiesc_completed field compared against ->completed, and if equal, the quiescent state is presumed to count against the current grace period. The earlier code copied rdp->completed to rdp->passed_quiesc_completed, which has been made to work, but is error-prone. In contrast, copying one less than rdp->gpnum is guaranteed safe, because rdp->gpnum is not incremented until after the start of the corresponding grace period. At the end of the grace period, when ->completed has incremented, then any quiescent periods recorded previously will be discarded. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12578890421011-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 26fc780..d802419 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -100,7 +100,7 @@ void rcu_sched_qs(int cpu) struct rcu_data *rdp; rdp = &per_cpu(rcu_sched_data, cpu); - rdp->passed_quiesc_completed = rdp->completed; + rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; rcu_preempt_note_context_switch(cpu); @@ -111,7 +111,7 @@ void rcu_bh_qs(int cpu) struct rcu_data *rdp; rdp = &per_cpu(rcu_bh_data, cpu); - rdp->passed_quiesc_completed = rdp->completed; + rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; } diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c03edf7..52075da 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -67,7 +67,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); static void rcu_preempt_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); - rdp->passed_quiesc_completed = rdp->completed; + rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; } -- cgit v0.10.2 From de8967214d8ce536161a1ad6538ad1cb82e7428d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Nov 2009 04:51:02 +0100 Subject: perf tools: Synthetize the targeted process Don't forget to also synthetize the targeted process from perf record or we'll miss its dso in the events and then we won't be able to deal with its build-id. We are missing it because it is created after the existing synthetized tasks but before the counters are enabled and can send its mapping event. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Hitoshi Mitake LKML-Reference: <1257911467-28276-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index ab33381..9f98b86 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -497,13 +497,22 @@ static int __cmd_record(int argc, const char **argv) if (target_pid == -1 && argc) { pid = fork(); if (pid < 0) - perror("failed to fork"); + die("failed to fork"); if (!pid) { if (execvp(argv[0], (char **)argv)) { perror(argv[0]); exit(-1); } + } else { + /* + * Wait a bit for the execv'ed child to appear + * and be updated in /proc + * FIXME: Do you know a less heuristical solution? + */ + usleep(1000); + event__synthesize_thread(pid, + process_synthesized_event); } child_pid = pid; -- cgit v0.10.2 From 8671dab9d5b2f0b444b8d09792384dccbfd43d14 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Nov 2009 04:51:03 +0100 Subject: perf tools: Move the build-id storage operations to headers So that it makes easier to control it. Especially because we plan to give it a feature section. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Hitoshi Mitake LKML-Reference: <1257911467-28276-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 9f98b86..c35e61b 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -378,39 +378,11 @@ static void open_counters(int cpu, pid_t pid) nr_cpu++; } -static bool write_buildid_table(void) -{ - struct dso *pos; - bool have_buildid = false; - - list_for_each_entry(pos, &dsos, node) { - struct build_id_event b; - size_t len; - - if (filename__read_build_id(pos->long_name, - &b.build_id, - sizeof(b.build_id)) < 0) - continue; - have_buildid = true; - memset(&b.header, 0, sizeof(b.header)); - len = strlen(pos->long_name) + 1; - len = ALIGN(len, 64); - b.header.size = sizeof(b) + len; - write_output(&b, sizeof(b)); - write_output(pos->long_name, len); - } - - return have_buildid; -} - static void atexit_header(void) { header->data_size += bytes_written; - if (write_buildid_table()) - perf_header__set_feat(header, HEADER_BUILD_ID); - - perf_header__write(header, output); + perf_header__write(header, output, true); } static int __cmd_record(int argc, const char **argv) @@ -487,7 +459,7 @@ static int __cmd_record(int argc, const char **argv) } if (file_new) - perf_header__write(header, output); + perf_header__write(header, output, false); if (!system_wide) event__synthesize_thread(pid, process_synthesized_event); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 050f543..a4d0bbe 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2,11 +2,13 @@ #include #include #include +#include #include "util.h" #include "header.h" #include "../perf.h" #include "trace-event.h" +#include "symbol.h" /* * Create new perf.data header attribute: @@ -172,7 +174,33 @@ static void do_write(int fd, void *buf, size_t size) } } -static void perf_header__adds_write(struct perf_header *self, int fd) +static bool write_buildid_table(int fd) +{ + struct dso *pos; + bool have_buildid = false; + + list_for_each_entry(pos, &dsos, node) { + struct build_id_event b; + size_t len; + + if (filename__read_build_id(pos->long_name, + &b.build_id, + sizeof(b.build_id)) < 0) + continue; + have_buildid = true; + memset(&b.header, 0, sizeof(b.header)); + len = strlen(pos->long_name) + 1; + len = ALIGN(len, 64); + b.header.size = sizeof(b) + len; + do_write(fd, &b, sizeof(b)); + do_write(fd, pos->long_name, len); + } + + return have_buildid; +} + +static void +perf_header__adds_write(struct perf_header *self, int fd, bool at_exit) { struct perf_file_section trace_sec; u64 cur_offset = lseek(fd, 0, SEEK_CUR); @@ -196,9 +224,16 @@ static void perf_header__adds_write(struct perf_header *self, int fd) */ cur_offset = lseek(fd, trace_sec.offset + trace_sec.size, SEEK_SET); } + + if (at_exit) { + lseek(fd, self->data_offset + self->data_size, SEEK_SET); + if (write_buildid_table(fd)) + perf_header__set_feat(self, HEADER_BUILD_ID); + lseek(fd, cur_offset, SEEK_SET); + } }; -void perf_header__write(struct perf_header *self, int fd) +void perf_header__write(struct perf_header *self, int fd, bool at_exit) { struct perf_file_header f_header; struct perf_file_attr f_attr; @@ -236,7 +271,7 @@ void perf_header__write(struct perf_header *self, int fd) if (events) do_write(fd, events, self->event_size); - perf_header__adds_write(self, fd); + perf_header__adds_write(self, fd, at_exit); self->data_offset = lseek(fd, 0, SEEK_CUR); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 2f233c5..77186c9 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -33,7 +33,7 @@ struct perf_header { }; struct perf_header *perf_header__read(int fd); -void perf_header__write(struct perf_header *self, int fd); +void perf_header__write(struct perf_header *self, int fd, bool at_exit); void perf_header__add_attr(struct perf_header *self, struct perf_header_attr *attr); -- cgit v0.10.2 From 57f395a7eabb913d3605d7392be5bdb0837c9f3d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Nov 2009 04:51:04 +0100 Subject: perf tools: Split up build id saving into fetch and write We are saving the build id once we stop the profiling. And only after doing that we know if we need to set that feature in the header through the feature bitmap. But if we want a proper feature support in the headers, using a rule of offset/size pairs in sections, we need to know in advance how many features we need to set in the headers, so that we can reserve rooms for their section headers. The current state doesn't allow that, as it forces us to first save the build-ids to the file right after the datas instead of planning any structured layout. That's why this splits up the build-ids processing in two parts: one that fetches the build-ids from the Dso objects, and one that saves them into the file. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Hitoshi Mitake LKML-Reference: <1257911467-28276-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 34c6fcb..1f771ce 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -69,6 +69,13 @@ struct build_id_event { char filename[]; }; +struct build_id_list { + struct build_id_event event; + struct list_head list; + const char *dso_name; + int len; +}; + typedef union event_union { struct perf_event_header header; struct ip_event ip; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a4d0bbe..2f702c2 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -174,29 +174,18 @@ static void do_write(int fd, void *buf, size_t size) } } -static bool write_buildid_table(int fd) +static void write_buildid_table(int fd, struct list_head *id_head) { - struct dso *pos; - bool have_buildid = false; - - list_for_each_entry(pos, &dsos, node) { - struct build_id_event b; - size_t len; - - if (filename__read_build_id(pos->long_name, - &b.build_id, - sizeof(b.build_id)) < 0) - continue; - have_buildid = true; - memset(&b.header, 0, sizeof(b.header)); - len = strlen(pos->long_name) + 1; - len = ALIGN(len, 64); - b.header.size = sizeof(b) + len; - do_write(fd, &b, sizeof(b)); - do_write(fd, pos->long_name, len); - } + struct build_id_list *iter, *next; + + list_for_each_entry_safe(iter, next, id_head, list) { + struct build_id_event *b = &iter->event; - return have_buildid; + do_write(fd, b, sizeof(*b)); + do_write(fd, (void *)iter->dso_name, iter->len); + list_del(&iter->list); + free(iter); + } } static void @@ -226,10 +215,14 @@ perf_header__adds_write(struct perf_header *self, int fd, bool at_exit) } if (at_exit) { - lseek(fd, self->data_offset + self->data_size, SEEK_SET); - if (write_buildid_table(fd)) + LIST_HEAD(id_list); + + if (fetch_build_id_table(&id_list)) { + lseek(fd, self->data_offset + self->data_size, SEEK_SET); perf_header__set_feat(self, HEADER_BUILD_ID); - lseek(fd, cur_offset, SEEK_SET); + write_buildid_table(fd, &id_list); + lseek(fd, cur_offset, SEEK_SET); + } } }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a2e95ce..9c286db 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -851,6 +851,40 @@ out_close: return err; } +bool fetch_build_id_table(struct list_head *head) +{ + bool have_buildid = false; + struct dso *pos; + + list_for_each_entry(pos, &dsos, node) { + struct build_id_list *new; + struct build_id_event b; + size_t len; + + if (filename__read_build_id(pos->long_name, + &b.build_id, + sizeof(b.build_id)) < 0) + continue; + have_buildid = true; + memset(&b.header, 0, sizeof(b.header)); + len = strlen(pos->long_name) + 1; + len = ALIGN(len, 64); + b.header.size = sizeof(b) + len; + + new = malloc(sizeof(*new)); + if (!new) + die("No memory\n"); + + memcpy(&new->event, &b, sizeof(b)); + new->dso_name = pos->long_name; + new->len = len; + + list_add_tail(&new->list, head); + } + + return have_buildid; +} + int filename__read_build_id(const char *filename, void *bf, size_t size) { int fd, err = -1; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f8c1899..0a34a54 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -86,6 +86,7 @@ char dso__symtab_origin(const struct dso *self); void dso__set_build_id(struct dso *self, void *build_id); int filename__read_build_id(const char *filename, void *bf, size_t size); +bool fetch_build_id_table(struct list_head *head); int build_id__sprintf(u8 *self, int len, char *bf); int load_kernel(symbol_filter_t filter); -- cgit v0.10.2 From 4778d2e4f410c6eea32f594cb2be9590bcb28b84 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Nov 2009 04:51:05 +0100 Subject: perf tools: Read the build-ids from the header layer Keep the build-ids reading implementation in the data mapping but move its call to the headers so that we have a better control on it (offset seeking, size passing, etc..). Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Hitoshi Mitake LKML-Reference: <1257911467-28276-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index 00a9c11..66e58aa 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -70,8 +70,8 @@ process_event(event_t *event, unsigned long offset, unsigned long head) } } -static int perf_header__read_build_ids(const struct perf_header *self, - int input, off_t file_size) +int perf_header__read_build_ids(const struct perf_header *self, + int input, off_t file_size) { off_t offset = self->data_offset + self->data_size; struct build_id_event bev; @@ -163,10 +163,6 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, if (curr_handler->sample_type_check(sample_type) < 0) exit(-1); - if (perf_header__has_feat(header, HEADER_BUILD_ID) && - perf_header__read_build_ids(header, input, input_stat.st_size)) - pr_debug("failed to read buildids, continuing...\n"); - if (load_kernel(NULL) < 0) { perror("failed to load kernel symbols"); return EXIT_FAILURE; diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h index 716d105..c412281 100644 --- a/tools/perf/util/data_map.h +++ b/tools/perf/util/data_map.h @@ -27,5 +27,7 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, int full_paths, int *cwdlen, char **cwd); +int perf_header__read_build_ids(const struct perf_header *self, + int input, off_t file_size); #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2f702c2..915b56e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -9,6 +9,8 @@ #include "../perf.h" #include "trace-event.h" #include "symbol.h" +#include "data_map.h" +#include "debug.h" /* * Create new perf.data header attribute: @@ -322,6 +324,14 @@ static void perf_header__adds_read(struct perf_header *self, int fd) trace_report(fd); lseek(fd, trace_sec.offset + trace_sec.size, SEEK_SET); } + + if (perf_header__has_feat(self, HEADER_BUILD_ID)) { + struct stat input_stat; + + fstat(fd, &input_stat); + if (perf_header__read_build_ids(self, fd, input_stat.st_size)) + pr_debug("failed to read buildids, continuing...\n"); + } }; struct perf_header *perf_header__read(int fd) @@ -382,14 +392,14 @@ struct perf_header *perf_header__read(int fd) memcpy(&self->adds_features, &f_header.adds_features, sizeof(f_header.adds_features)); - perf_header__adds_read(self, fd); - self->event_offset = f_header.event_types.offset; self->event_size = f_header.event_types.size; self->data_offset = f_header.data.offset; self->data_size = f_header.data.size; + perf_header__adds_read(self, fd); + lseek(fd, self->data_offset, SEEK_SET); self->frozen = 1; -- cgit v0.10.2 From 3e13ab2d83b6867a20663c73c184f29c2fde1558 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Nov 2009 04:51:06 +0100 Subject: perf tools: Use perf_header__set/has_feat whenever possible And drop the alternate checks/sets using set_bit or other kind of helpers. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Hitoshi Mitake LKML-Reference: <1257911467-28276-5-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index c35e61b..326e8a7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -431,11 +431,11 @@ static int __cmd_record(int argc, const char **argv) header = perf_header__new(); if (raw_samples) { - perf_header__feat_trace_info(header); + perf_header__set_feat(header, HEADER_TRACE_INFO); } else { for (i = 0; i < nr_counters; i++) { if (attrs[i].sample_type & PERF_SAMPLE_RAW) { - perf_header__feat_trace_info(header); + perf_header__set_feat(header, HEADER_TRACE_INFO); break; } } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 915b56e..9709d38 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -148,11 +148,6 @@ struct perf_file_header { DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); }; -void perf_header__feat_trace_info(struct perf_header *header) -{ - set_bit(HEADER_TRACE_INFO, header->adds_features); -} - void perf_header__set_feat(struct perf_header *self, int feat) { set_bit(feat, self->adds_features); @@ -195,9 +190,8 @@ perf_header__adds_write(struct perf_header *self, int fd, bool at_exit) { struct perf_file_section trace_sec; u64 cur_offset = lseek(fd, 0, SEEK_CUR); - unsigned long *feat_mask = self->adds_features; - if (test_bit(HEADER_TRACE_INFO, feat_mask)) { + if (perf_header__has_feat(self, HEADER_TRACE_INFO)) { /* Write trace info */ trace_sec.offset = lseek(fd, sizeof(trace_sec), SEEK_CUR); read_tracing_data(fd, attrs, nr_counters); @@ -314,9 +308,7 @@ static void do_read(int fd, void *buf, size_t size) static void perf_header__adds_read(struct perf_header *self, int fd) { - const unsigned long *feat_mask = self->adds_features; - - if (test_bit(HEADER_TRACE_INFO, feat_mask)) { + if (perf_header__has_feat(self, HEADER_TRACE_INFO)) { struct perf_file_section trace_sec; do_read(fd, &trace_sec, sizeof(trace_sec)); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 77186c9..a22d70b 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -49,7 +49,6 @@ void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); u64 perf_header__sample_type(struct perf_header *header); struct perf_event_attr * perf_header__find_attr(u64 id, struct perf_header *header); -void perf_header__feat_trace_info(struct perf_header *header); void perf_header__set_feat(struct perf_header *self, int feat); bool perf_header__has_feat(const struct perf_header *self, int feat); -- cgit v0.10.2 From 9e827dd00a94136b944a538bede67c944d0b740a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 11 Nov 2009 04:51:07 +0100 Subject: perf tools: Bring linear set of section headers for features Build a set of section headers for features right after the datas. Each implemented feature will have one of such section header that provides the offset and the size of the data manipulated by the feature. The trace informations have moved after the data and are recorded on exit time. The new layout is as follows: ----------------------- ___ [ magic ] | [ header size ] | [ attr size ] | [ attr content offset ] | [ attr content size ] | [ data offset ] File Headers [ data size ] | [ event_types offset ] | [ event_types size ] | [ feature bitmap ] v [ attr section ] [ events section ] ___ [ X ] | [ X ] | [ X ] Datas [ X ] | [ X ] v ___ [ Feature 1 offset ] | [ Feature 1 size ] Features headers [ Feature 2 offset ] | [ Feature 2 size ] v [ Feature 1 content ] [ Feature 2 content ] ----------------------- We have as many feature's section headers as we have features in use for the current file. Say Feat 1 and Feat 3 are used by the file, but not Feat 2. Then the feature headers will be like follows: [ Feature 1 offset ] | [ Feature 1 size ] Features headers [ Feature 3 offset ] | [ Feature 3 size ] v There is no hole to cover Feature 2 that is not in use here. We only need to cover the needed headers in order, from the lowest feature bit to the highest. Currently we have two features: HEADER_TRACE_INFO and HEADER_BUILD_ID. Both have their contents that follow the feature headers. Putting the contents right after the feature headers is not mandatory though. While we keep the feature headers right after the data and in order, their offsets can point everywhere. We have just put the two above feature contents in the end of the file for convenience. The purpose of this layout change is to have a file format that scales while keeping it simple: having such linear feature headers is less error prone wrt forward/backward compatibility as the content of a feature can be put anywhere, its location can even change by the time, it's fine because its headers will tell where it is. And we know how to find these headers, following the above rules. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Hitoshi Mitake LKML-Reference: <1257911467-28276-6-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index 66e58aa..aacb814 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -70,18 +70,15 @@ process_event(event_t *event, unsigned long offset, unsigned long head) } } -int perf_header__read_build_ids(const struct perf_header *self, - int input, off_t file_size) +int perf_header__read_build_ids(int input, off_t size) { - off_t offset = self->data_offset + self->data_size; struct build_id_event bev; char filename[PATH_MAX]; + off_t offset = lseek(input, 0, SEEK_CUR); + off_t limit = offset + size; int err = -1; - if (lseek(input, offset, SEEK_SET) < 0) - return -1; - - while (offset < file_size) { + while (offset < limit) { struct dso *dso; ssize_t len; diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h index c412281..20b4037 100644 --- a/tools/perf/util/data_map.h +++ b/tools/perf/util/data_map.h @@ -27,7 +27,6 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, int full_paths, int *cwdlen, char **cwd); -int perf_header__read_build_ids(const struct perf_header *self, - int input, off_t file_size); +int perf_header__read_build_ids(int input, off_t file_size); #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 9709d38..ebed4f4 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -186,41 +186,58 @@ static void write_buildid_table(int fd, struct list_head *id_head) } static void -perf_header__adds_write(struct perf_header *self, int fd, bool at_exit) +perf_header__adds_write(struct perf_header *self, int fd) { - struct perf_file_section trace_sec; - u64 cur_offset = lseek(fd, 0, SEEK_CUR); + LIST_HEAD(id_list); + int nr_sections; + struct perf_file_section *feat_sec; + int sec_size; + u64 sec_start; + int idx = 0; + + if (fetch_build_id_table(&id_list)) + perf_header__set_feat(self, HEADER_BUILD_ID); + + nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); + if (!nr_sections) + return; + + feat_sec = calloc(sizeof(*feat_sec), nr_sections); + if (!feat_sec) + die("No memory"); + + sec_size = sizeof(*feat_sec) * nr_sections; + + sec_start = self->data_offset + self->data_size; + lseek(fd, sec_start + sec_size, SEEK_SET); if (perf_header__has_feat(self, HEADER_TRACE_INFO)) { + struct perf_file_section *trace_sec; + + trace_sec = &feat_sec[idx++]; + /* Write trace info */ - trace_sec.offset = lseek(fd, sizeof(trace_sec), SEEK_CUR); + trace_sec->offset = lseek(fd, 0, SEEK_CUR); read_tracing_data(fd, attrs, nr_counters); - trace_sec.size = lseek(fd, 0, SEEK_CUR) - trace_sec.offset; - - /* Write trace info headers */ - lseek(fd, cur_offset, SEEK_SET); - do_write(fd, &trace_sec, sizeof(trace_sec)); - - /* - * Update cur_offset. So that other (future) - * features can set their own infos in this place. But if we are - * the only feature, at least that seeks to the place the data - * should begin. - */ - cur_offset = lseek(fd, trace_sec.offset + trace_sec.size, SEEK_SET); + trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; } - if (at_exit) { - LIST_HEAD(id_list); - if (fetch_build_id_table(&id_list)) { - lseek(fd, self->data_offset + self->data_size, SEEK_SET); - perf_header__set_feat(self, HEADER_BUILD_ID); - write_buildid_table(fd, &id_list); - lseek(fd, cur_offset, SEEK_SET); - } + if (perf_header__has_feat(self, HEADER_BUILD_ID)) { + struct perf_file_section *buildid_sec; + + buildid_sec = &feat_sec[idx++]; + + /* Write build-ids */ + buildid_sec->offset = lseek(fd, 0, SEEK_CUR); + write_buildid_table(fd, &id_list); + buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset; } -}; + + lseek(fd, sec_start, SEEK_SET); + do_write(fd, feat_sec, sec_size); + free(feat_sec); +} void perf_header__write(struct perf_header *self, int fd, bool at_exit) { @@ -260,10 +277,11 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit) if (events) do_write(fd, events, self->event_size); - perf_header__adds_write(self, fd, at_exit); - self->data_offset = lseek(fd, 0, SEEK_CUR); + if (at_exit) + perf_header__adds_write(self, fd); + f_header = (struct perf_file_header){ .magic = PERF_MAGIC, .size = sizeof(f_header), @@ -308,22 +326,44 @@ static void do_read(int fd, void *buf, size_t size) static void perf_header__adds_read(struct perf_header *self, int fd) { + struct perf_file_section *feat_sec; + int nr_sections; + int sec_size; + int idx = 0; + + + nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); + if (!nr_sections) + return; + + feat_sec = calloc(sizeof(*feat_sec), nr_sections); + if (!feat_sec) + die("No memory"); + + sec_size = sizeof(*feat_sec) * nr_sections; + + lseek(fd, self->data_offset + self->data_size, SEEK_SET); + + do_read(fd, feat_sec, sec_size); + if (perf_header__has_feat(self, HEADER_TRACE_INFO)) { - struct perf_file_section trace_sec; + struct perf_file_section *trace_sec; - do_read(fd, &trace_sec, sizeof(trace_sec)); - lseek(fd, trace_sec.offset, SEEK_SET); + trace_sec = &feat_sec[idx++]; + lseek(fd, trace_sec->offset, SEEK_SET); trace_report(fd); - lseek(fd, trace_sec.offset + trace_sec.size, SEEK_SET); } if (perf_header__has_feat(self, HEADER_BUILD_ID)) { - struct stat input_stat; + struct perf_file_section *buildid_sec; - fstat(fd, &input_stat); - if (perf_header__read_build_ids(self, fd, input_stat.st_size)) + buildid_sec = &feat_sec[idx++]; + lseek(fd, buildid_sec->offset, SEEK_SET); + if (perf_header__read_build_ids(fd, buildid_sec->size)) pr_debug("failed to read buildids, continuing...\n"); } + + free(feat_sec); }; struct perf_header *perf_header__read(int fd) diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h index 821c103..9450763 100644 --- a/tools/perf/util/include/linux/bitmap.h +++ b/tools/perf/util/include/linux/bitmap.h @@ -1,2 +1,3 @@ #include "../../../../include/linux/bitmap.h" #include "../../../../include/asm-generic/bitops/find.h" +#include diff --git a/tools/perf/util/include/linux/ctype.h b/tools/perf/util/include/linux/ctype.h index bae5783..a53d4ee 100644 --- a/tools/perf/util/include/linux/ctype.h +++ b/tools/perf/util/include/linux/ctype.h @@ -1 +1 @@ -#include "../../../../include/linux/ctype.h" +#include "../util.h" diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9de2329..7bd5bda 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -306,6 +306,7 @@ static inline int has_extension(const char *filename, const char *ext) #undef isascii #undef isspace #undef isdigit +#undef isxdigit #undef isalpha #undef isprint #undef isalnum @@ -323,6 +324,8 @@ extern unsigned char sane_ctype[256]; #define isascii(x) (((x) & ~0x7f) == 0) #define isspace(x) sane_istest(x,GIT_SPACE) #define isdigit(x) sane_istest(x,GIT_DIGIT) +#define isxdigit(x) \ + (sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G') #define isalpha(x) sane_istest(x,GIT_ALPHA) #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) #define isprint(x) sane_istest(x,GIT_PRINT) -- cgit v0.10.2 From 5d7bdab75cd56d2bdc0986ae5546be3b09fea70a Mon Sep 17 00:00:00 2001 From: Michael Cree Date: Wed, 11 Nov 2009 20:43:03 +1300 Subject: perf tools: Test -fstack-protector-all compiler option for inclusion in CFLAGS Some architectures (e.g. Alpha) do not support the -fstack-protector-all compiler option and the use of the option with -Werror causes the compiler to abort and the build fails. Test that the compiler supports -fstack-protector-all before inclusion in CFLAGS. Signed-off-by: Michael Cree Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091111074302.GA3728@omega> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index b9509b1..e6d4272 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -207,7 +207,7 @@ ifndef PERF_DEBUG CFLAGS_OPTIMIZE = -O6 endif -CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -fstack-protector-all -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) +CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) @@ -259,6 +259,9 @@ PTHREAD_LIBS = -lpthread # explicitly what architecture to check for. Fix this up for yours.. SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ +ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null >/dev/null 2>&1 && echo y"), y) + CFLAGS := $(CFLAGS) -fstack-protector-all +endif ### --- END CONFIGURATION SECTION --- -- cgit v0.10.2 From ce6b5d768c79b9d5dd6345c033bae781d5ca9b8e Mon Sep 17 00:00:00 2001 From: Yong Wang Date: Wed, 11 Nov 2009 15:51:25 +0800 Subject: x86: Mark the thermal init functions __init Mark the thermal init functions __init so that the init memory can be freed. Signed-off-by: Yong Wang LKML-Reference: <20091111075125.GA17900@ywang-moblin2.bj.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7f3cf36..8a73d5c 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -256,7 +256,7 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } -void mcheck_intel_therm_init(void) +void __init mcheck_intel_therm_init(void) { /* * This function is only called on boot CPU. Save the init thermal @@ -268,7 +268,7 @@ void mcheck_intel_therm_init(void) lvtthmr_init = apic_read(APIC_LVTTHMR); } -void intel_init_thermal(struct cpuinfo_x86 *c) +void __init intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); int tm2 = 0; -- cgit v0.10.2 From b18485e7acfe1a634615d1c628ef644c0d58d472 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 12 Nov 2009 00:03:28 +0900 Subject: swiotlb: Remove the swiotlb variable usage POWERPC doesn't expect it to be used. This fixes the linux-next build failure reported by Stephen Rothwell: lib/swiotlb.c: In function 'setup_io_tlb_npages': lib/swiotlb.c:114: error: 'swiotlb' undeclared (first use in this function) Reported-by: Stephen Rothwell Signed-off-by: FUJITA Tomonori Cc: peterz@infradead.org LKML-Reference: <20091112000258F.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index b9e4e20..940f13a 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -9,11 +9,12 @@ extern int swiotlb_force; #ifdef CONFIG_SWIOTLB extern int swiotlb; -extern void pci_swiotlb_init(void); +extern int pci_swiotlb_init(void); #else #define swiotlb 0 -static inline void pci_swiotlb_init(void) +static inline int pci_swiotlb_init(void) { + return 0; } #endif diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index f79870e..0b11bf1 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -125,16 +125,13 @@ static void __init dma32_free_bootmem(void) void __init pci_iommu_alloc(void) { - /* swiotlb is forced by the boot option */ - int use_swiotlb = swiotlb; #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); #else dma_ops = &nommu_dma_ops; #endif - pci_swiotlb_init(); - if (use_swiotlb) + if (pci_swiotlb_init()) return; gart_iommu_hole_init(); diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 17ce422..a6e5d0f 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -42,16 +42,27 @@ static struct dma_map_ops swiotlb_dma_ops = { .dma_supported = NULL, }; -void __init pci_swiotlb_init(void) +/* + * pci_swiotlb_init - initialize swiotlb if necessary + * + * This returns non-zero if we are forced to use swiotlb (by the boot + * option). + */ +int __init pci_swiotlb_init(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 if (!no_iommu && max_pfn > MAX_DMA32_PFN) swiotlb = 1; #endif + if (swiotlb_force) + swiotlb = 1; + if (swiotlb) { swiotlb_init(0); dma_ops = &swiotlb_dma_ops; } else dma_ops = &nommu_dma_ops; + + return swiotlb_force; } diff --git a/lib/swiotlb.c b/lib/swiotlb.c index e6755a0..795472d 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -109,10 +109,9 @@ setup_io_tlb_npages(char *str) } if (*str == ',') ++str; - if (!strcmp(str, "force")) { + if (!strcmp(str, "force")) swiotlb_force = 1; - swiotlb = 1; - } + return 1; } __setup("swiotlb=", setup_io_tlb_npages); -- cgit v0.10.2 From e657ea17ef2d7f364e5c2625157f6cc0584ac7ad Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 11 Nov 2009 09:31:07 -0800 Subject: pcmcia: fix printk formats Fix printk format warnings on sizeof() [size_t] arguments. drivers/char/pcmcia/cm4040_cs.c:267: warning: format '%lu' expects type 'long unsigned int', but argument 5 has type 'size_t' drivers/char/pcmcia/cm4040_cs.c:272: warning: format '%lu' expects type 'long unsigned int', but argument 5 has type 'size_t' CC: Harald Welte Signed-off-by: Randy Dunlap Signed-off-by: Dominik Brodowski diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index 0001ad2..38790db 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -264,12 +264,12 @@ static ssize_t cm4040_read(struct file *filp, char __user *buf, bytes_to_read = 5 + le32_to_cpu(*(__le32 *)&dev->r_buf[1]); - DEBUGP(6, dev, "BytesToRead=%lu\n", bytes_to_read); + DEBUGP(6, dev, "BytesToRead=%zu\n", bytes_to_read); min_bytes_to_read = min(count, bytes_to_read + 5); min_bytes_to_read = min_t(size_t, min_bytes_to_read, READ_WRITE_BUFFER_SIZE); - DEBUGP(6, dev, "Min=%lu\n", min_bytes_to_read); + DEBUGP(6, dev, "Min=%zu\n", min_bytes_to_read); for (i = 0; i < (min_bytes_to_read-5); i++) { rc = wait_for_bulk_in_ready(dev); -- cgit v0.10.2 From b285fab4185a9b3db953726f0dd9d343a6e389db Mon Sep 17 00:00:00 2001 From: Avi Cohen Stuart Date: Tue, 10 Nov 2009 22:43:46 +0100 Subject: pcmcia: correct handling for Zoomed Video registers in topic.h Fix handling of Zoomed Video Registers in the Topic pcmcia controller ( http://bugzilla.kernel.org/show_bug.cgi?id=14581 ). The information has been retrieved from the Topic manual which can be obtained from Toshiba. The Zoomed Video is used with PCMCIA Cards like the Margi DVD-to-Go. [linux@dominikbrodowski.net: whitespace & commit message fix] Signed-off-by: Avi Cohen Stuart Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/topic.h b/drivers/pcmcia/topic.h index edccfa5..615a45a 100644 --- a/drivers/pcmcia/topic.h +++ b/drivers/pcmcia/topic.h @@ -114,22 +114,17 @@ static void topic97_zoom_video(struct pcmcia_socket *sock, int onoff) reg_zv |= TOPIC97_ZV_CONTROL_ENABLE; config_writeb(socket, TOPIC97_ZOOM_VIDEO_CONTROL, reg_zv); - reg = config_readb(socket, TOPIC97_MISC2); - reg |= TOPIC97_MISC2_ZV_ENABLE; - config_writeb(socket, TOPIC97_MISC2, reg); - - /* not sure this is needed, doc is unclear */ -#if 0 reg = config_readb(socket, TOPIC97_AUDIO_VIDEO_SWITCH); reg |= TOPIC97_AVS_AUDIO_CONTROL | TOPIC97_AVS_VIDEO_CONTROL; config_writeb(socket, TOPIC97_AUDIO_VIDEO_SWITCH, reg); -#endif - } - else { + } else { reg_zv &= ~TOPIC97_ZV_CONTROL_ENABLE; config_writeb(socket, TOPIC97_ZOOM_VIDEO_CONTROL, reg_zv); - } + reg = config_readb(socket, TOPIC97_AUDIO_VIDEO_SWITCH); + reg &= ~(TOPIC97_AVS_AUDIO_CONTROL | TOPIC97_AVS_VIDEO_CONTROL); + config_writeb(socket, TOPIC97_AUDIO_VIDEO_SWITCH, reg); + } } static int topic97_override(struct yenta_socket *socket) -- cgit v0.10.2 From 0e0fc1c23e04c15e814763f2b366e92d87d8b95d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 11 Nov 2009 11:28:06 -0800 Subject: rcu: Mark init-time-only rcu_bootup_announce() as __init Because rcu_bootup_announce() is used only at boot time, mark it as __init, presumably so that its memory can be reclaimed. Suggested-by: Joe Perches Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <20091111192806.GA10073@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 52075da..5ca2d26 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); /* * Tell them what RCU they are running. */ -static void rcu_bootup_announce(void) +static void __init rcu_bootup_announce(void) { printk(KERN_INFO "Experimental preemptable hierarchical RCU implementation.\n"); @@ -481,7 +481,7 @@ void exit_rcu(void) /* * Tell them what RCU they are running. */ -static void rcu_bootup_announce(void) +static void __init rcu_bootup_announce(void) { printk(KERN_INFO "Hierarchical RCU implementation.\n"); } -- cgit v0.10.2 From a646365cc330b5aaf4452c91f61b1e0d1acf68d0 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 11 Nov 2009 22:26:35 +0100 Subject: tracing: Fix return value of tracing_stats_read() The function tracing_stats_read() mistakenly returns ENOMEM instead of the negative value -ENOMEM. Signed-off-by: Roel Kluin LKML-Reference: <4AFB2C0B.50605@gmail.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b20d3ec..03c7fd5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3730,7 +3730,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf, s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) - return ENOMEM; + return -ENOMEM; trace_seq_init(s); -- cgit v0.10.2 From a6f0eb6adc42e5eed3f35af99c61c0e411b16f8e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 Nov 2009 17:14:07 -0500 Subject: ring-buffer: Add multiple iterations between benchmark timestamps The ring_buffer_benchmark does a gettimeofday after every write to the ring buffer in its measurements. This adds the overhead of the call to gettimeofday to the measurements and does not give an accurate picture of the length of time it takes to record a trace. This was first noticed with perf top: ------------------------------------------------------------------------------ PerfTop: 679 irqs/sec kernel:99.9% [1000Hz cpu-clock-msecs], (all, 4 CPUs) ------------------------------------------------------------------------------ samples pcnt kernel function _______ _____ _______________ 1673.00 - 27.8% : trace_clock_local 806.00 - 13.4% : do_gettimeofday 590.00 - 9.8% : rb_reserve_next_event 554.00 - 9.2% : native_read_tsc 431.00 - 7.2% : ring_buffer_lock_reserve 365.00 - 6.1% : __rb_reserve_next 355.00 - 5.9% : rb_end_commit 322.00 - 5.4% : getnstimeofday 268.00 - 4.5% : ring_buffer_unlock_commit 262.00 - 4.4% : ring_buffer_producer_thread [ring_buffer_benchmark] 113.00 - 1.9% : read_tsc 91.00 - 1.5% : debug_smp_processor_id 69.00 - 1.1% : trace_recursive_unlock 66.00 - 1.1% : ring_buffer_event_data 25.00 - 0.4% : _spin_unlock_irq And the length of each write to the ring buffer measured at 310ns. This patch adds a new module parameter called "write_interval" which is defaulted to 50. This is the number of writes performed between timestamps. After this patch perf top shows: ------------------------------------------------------------------------------ PerfTop: 244 irqs/sec kernel:100.0% [1000Hz cpu-clock-msecs], (all, 4 CPUs) ------------------------------------------------------------------------------ samples pcnt kernel function _______ _____ _______________ 2842.00 - 40.4% : trace_clock_local 1043.00 - 14.8% : rb_reserve_next_event 784.00 - 11.1% : ring_buffer_lock_reserve 600.00 - 8.5% : __rb_reserve_next 579.00 - 8.2% : rb_end_commit 440.00 - 6.3% : ring_buffer_unlock_commit 290.00 - 4.1% : ring_buffer_producer_thread [ring_buffer_benchmark] 155.00 - 2.2% : debug_smp_processor_id 117.00 - 1.7% : trace_recursive_unlock 103.00 - 1.5% : ring_buffer_event_data 28.00 - 0.4% : do_gettimeofday 22.00 - 0.3% : _spin_unlock_irq 14.00 - 0.2% : native_read_tsc 11.00 - 0.2% : getnstimeofday do_gettimeofday dropped from 13% usage to a mere 0.4%! (using the default 50 interval) The measurement for each timestamp went from 310ns to 210ns. That's 100ns (1/3rd) overhead that the gettimeofday call was introducing. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 573d3cc..70df73e 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -35,6 +35,10 @@ static int disable_reader; module_param(disable_reader, uint, 0644); MODULE_PARM_DESC(disable_reader, "only run producer"); +static int write_iteration = 50; +module_param(write_iteration, uint, 0644); +MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); + static int read_events; static int kill_test; @@ -208,15 +212,18 @@ static void ring_buffer_producer(void) do { struct ring_buffer_event *event; int *entry; - - event = ring_buffer_lock_reserve(buffer, 10); - if (!event) { - missed++; - } else { - hit++; - entry = ring_buffer_event_data(event); - *entry = smp_processor_id(); - ring_buffer_unlock_commit(buffer, event); + int i; + + for (i = 0; i < write_iteration; i++) { + event = ring_buffer_lock_reserve(buffer, 10); + if (!event) { + missed++; + } else { + hit++; + entry = ring_buffer_event_data(event); + *entry = smp_processor_id(); + ring_buffer_unlock_commit(buffer, event); + } } do_gettimeofday(&end_tv); -- cgit v0.10.2 From 8b2a5dac7859dd1954095fce8b6445c3ceb36ef6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 Nov 2009 19:36:03 -0500 Subject: tracing: do not disable interrupts for trace_clock_local Disabling interrupts in trace_clock_local takes quite a performance hit to the recording of traces. Using perf top we see: ------------------------------------------------------------------------------ PerfTop: 244 irqs/sec kernel:100.0% [1000Hz cpu-clock-msecs], (all, 4 CPUs) ------------------------------------------------------------------------------ samples pcnt kernel function _______ _____ _______________ 2842.00 - 40.4% : trace_clock_local 1043.00 - 14.8% : rb_reserve_next_event 784.00 - 11.1% : ring_buffer_lock_reserve 600.00 - 8.5% : __rb_reserve_next 579.00 - 8.2% : rb_end_commit 440.00 - 6.3% : ring_buffer_unlock_commit 290.00 - 4.1% : ring_buffer_producer_thread [ring_buffer_benchmark] 155.00 - 2.2% : debug_smp_processor_id 117.00 - 1.7% : trace_recursive_unlock 103.00 - 1.5% : ring_buffer_event_data 28.00 - 0.4% : do_gettimeofday 22.00 - 0.3% : _spin_unlock_irq 14.00 - 0.2% : native_read_tsc 11.00 - 0.2% : getnstimeofday Where trace_clock_local is 40% of the tracing, and the time for recording a trace according to ring_buffer_benchmark is 210ns. After converting the interrupts to preemption disabling we have from perf top: ------------------------------------------------------------------------------ PerfTop: 1084 irqs/sec kernel:99.9% [1000Hz cpu-clock-msecs], (all, 4 CPUs) ------------------------------------------------------------------------------ samples pcnt kernel function _______ _____ _______________ 1277.00 - 16.8% : native_read_tsc 1148.00 - 15.1% : rb_reserve_next_event 896.00 - 11.8% : ring_buffer_lock_reserve 688.00 - 9.1% : __rb_reserve_next 664.00 - 8.8% : rb_end_commit 563.00 - 7.4% : ring_buffer_unlock_commit 508.00 - 6.7% : _spin_unlock_irq 365.00 - 4.8% : debug_smp_processor_id 321.00 - 4.2% : trace_clock_local 303.00 - 4.0% : ring_buffer_producer_thread [ring_buffer_benchmark] 273.00 - 3.6% : native_sched_clock 122.00 - 1.6% : trace_recursive_unlock 113.00 - 1.5% : sched_clock 101.00 - 1.3% : ring_buffer_event_data 53.00 - 0.7% : tick_nohz_stop_sched_tick Where trace_clock_local drops from 40% to only taking 4% of the total time. The trace time also goes from 210ns down to 179ns (31ns). I talked with Peter Zijlstra about the impact that sched_clock may have without having interrupts disabled, and he told me that if a timer interrupt comes in, sched_clock may report a wrong time. Balancing a seldom incorrect timestamp with a 15% performance boost, I'll take the performance boost. Acked-by: Peter Zijlstra Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 20c5f92..878c03f 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -20,6 +20,8 @@ #include #include +#include "trace.h" + /* * trace_clock_local(): the simplest and least coherent tracing clock. * @@ -28,17 +30,17 @@ */ u64 notrace trace_clock_local(void) { - unsigned long flags; u64 clock; + int resched; /* * sched_clock() is an architecture implemented, fast, scalable, * lockless clock. It is not guaranteed to be coherent across * CPUs, nor across CPU idle events. */ - raw_local_irq_save(flags); + resched = ftrace_preempt_disable(); clock = sched_clock(); - raw_local_irq_restore(flags); + ftrace_preempt_enable(resched); return clock; } -- cgit v0.10.2 From cffd377e5879ea58522224a785a083f201afd80e Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 12 Nov 2009 15:52:40 +0900 Subject: x86, mce: Fix __init annotations The intel_init_thermal() is called from resume path, so it cannot be marked as __init. OTOH mce_banks_init() is only called from __mcheck_cpu_cap_init() which is marked as __cpuinit, so it can be also marked as __cpuinit. Signed-off-by: Hidetoshi Seto Acked-by: Yong Wang LKML-Reference: <4AFBB0B8.2070501@jp.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0d41020..5f277ca 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1201,7 +1201,7 @@ int mce_notify_irq(void) } EXPORT_SYMBOL_GPL(mce_notify_irq); -static int mce_banks_init(void) +static int __cpuinit __mcheck_cpu_mce_banks_init(void) { int i; @@ -1242,7 +1242,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void) WARN_ON(banks != 0 && b != banks); banks = b; if (!mce_banks) { - int err = mce_banks_init(); + int err = __mcheck_cpu_mce_banks_init(); if (err) return err; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 8a73d5c..4fef985 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -268,7 +268,7 @@ void __init mcheck_intel_therm_init(void) lvtthmr_init = apic_read(APIC_LVTTHMR); } -void __init intel_init_thermal(struct cpuinfo_x86 *c) +void intel_init_thermal(struct cpuinfo_x86 *c) { unsigned int cpu = smp_processor_id(); int tm2 = 0; -- cgit v0.10.2 From db48cccc7c709ccfa7cb4ac702bc27c216bffee7 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Thu, 12 Nov 2009 11:25:34 +0900 Subject: perf_event, x86: Annotate init functions and data Annotate init functions and data with __init and __initconst. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <4AFB721E.8070203@ct.jp.nec.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2e20bca..bd87430 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -245,7 +245,7 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; -static const u64 nehalem_hw_cache_event_ids +static __initconst u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -336,7 +336,7 @@ static const u64 nehalem_hw_cache_event_ids }, }; -static const u64 core2_hw_cache_event_ids +static __initconst u64 core2_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -427,7 +427,7 @@ static const u64 core2_hw_cache_event_ids }, }; -static const u64 atom_hw_cache_event_ids +static __initconst u64 atom_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -536,7 +536,7 @@ static u64 intel_pmu_raw_event(u64 hw_event) return hw_event & CORE_EVNTSEL_MASK; } -static const u64 amd_hw_cache_event_ids +static __initconst u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -1964,7 +1964,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { .priority = 1 }; -static struct x86_pmu p6_pmu = { +static __initconst struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = p6_pmu_handle_irq, .disable_all = p6_pmu_disable_all, @@ -1992,7 +1992,7 @@ static struct x86_pmu p6_pmu = { .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu intel_pmu = { +static __initconst struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, .disable_all = intel_pmu_disable_all, @@ -2016,7 +2016,7 @@ static struct x86_pmu intel_pmu = { .get_event_idx = intel_get_event_idx, }; -static struct x86_pmu amd_pmu = { +static __initconst struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = amd_pmu_handle_irq, .disable_all = amd_pmu_disable_all, @@ -2037,7 +2037,7 @@ static struct x86_pmu amd_pmu = { .get_event_idx = gen_get_event_idx, }; -static int p6_pmu_init(void) +static __init int p6_pmu_init(void) { switch (boot_cpu_data.x86_model) { case 1: @@ -2071,7 +2071,7 @@ static int p6_pmu_init(void) return 0; } -static int intel_pmu_init(void) +static __init int intel_pmu_init(void) { union cpuid10_edx edx; union cpuid10_eax eax; @@ -2144,7 +2144,7 @@ static int intel_pmu_init(void) return 0; } -static int amd_pmu_init(void) +static __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) -- cgit v0.10.2 From 055a00865dcfc8e61f3cbefbb879c9577bd36ae5 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 12 Nov 2009 11:07:44 +0100 Subject: sched: Fix/add missing update_rq_clock() calls kthread_bind(), migrate_task() and sched_fork were missing updates, and try_to_wake_up() was updating after having already used the stale clock. Aside from preventing potential latency hits, there' a side benefit in that early boot printk time stamps become monotonic. Signed-off-by: Mike Galbraith Acked-by: Peter Zijlstra LKML-Reference: <1258020464.6491.2.camel@marge.simson.net> Signed-off-by: Ingo Molnar LKML-Reference: diff --git a/kernel/sched.c b/kernel/sched.c index 3c11ae0..701eca4 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2017,6 +2017,7 @@ void kthread_bind(struct task_struct *p, unsigned int cpu) } spin_lock_irqsave(&rq->lock, flags); + update_rq_clock(rq); set_task_cpu(p, cpu); p->cpus_allowed = cpumask_of_cpu(cpu); p->rt.nr_cpus_allowed = 1; @@ -2115,6 +2116,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) * it is sufficient to simply update the task's cpu field. */ if (!p->se.on_rq && !task_running(rq, p)) { + update_rq_clock(rq); set_task_cpu(p, dest_cpu); return 0; } @@ -2376,14 +2378,15 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, task_rq_unlock(rq, &flags); cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); - if (cpu != orig_cpu) + if (cpu != orig_cpu) { + local_irq_save(flags); + rq = cpu_rq(cpu); + update_rq_clock(rq); set_task_cpu(p, cpu); - + local_irq_restore(flags); + } rq = task_rq_lock(p, &flags); - if (rq != orig_rq) - update_rq_clock(rq); - WARN_ON(p->state != TASK_WAKING); cpu = task_cpu(p); @@ -2545,6 +2548,7 @@ static void __sched_fork(struct task_struct *p) void sched_fork(struct task_struct *p, int clone_flags) { int cpu = get_cpu(); + unsigned long flags; __sched_fork(p); @@ -2581,7 +2585,10 @@ void sched_fork(struct task_struct *p, int clone_flags) #ifdef CONFIG_SMP cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); #endif + local_irq_save(flags); + update_rq_clock(cpu_rq(cpu)); set_task_cpu(p, cpu); + local_irq_restore(flags); #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) if (likely(sched_info_on())) -- cgit v0.10.2 From 761b1d26df542fd5eb348837351e4d2f3bc7bffe Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 12 Nov 2009 13:33:45 +0900 Subject: sched: Fix granularity of task_u/stime() Originally task_s/utime() were designed to return clock_t but later changed to return cputime_t by following commit: commit efe567fc8281661524ffa75477a7c4ca9b466c63 Author: Christian Borntraeger Date: Thu Aug 23 15:18:02 2007 +0200 It only changed the type of return value, but not the implementation. As the result the granularity of task_s/utime() is still that of clock_t, not that of cputime_t. So using task_s/utime() in __exit_signal() makes values accumulated to the signal struct to be rounded and coarse grained. This patch removes casts to clock_t in task_u/stime(), to keep granularity of cputime_t over the calculation. v2: Use div_u64() to avoid error "undefined reference to `__udivdi3`" on some 32bit systems. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: xiyou.wangcong@gmail.com Cc: Spencer Candland Cc: Oleg Nesterov Cc: Stanislaw Gruszka LKML-Reference: <4AFB9029.9000208@jp.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 43e61fa..ab9a034 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5156,41 +5156,45 @@ cputime_t task_stime(struct task_struct *p) return p->stime; } #else + +#ifndef nsecs_to_cputime +# define nsecs_to_cputime(__nsecs) \ + msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC)) +#endif + cputime_t task_utime(struct task_struct *p) { - clock_t utime = cputime_to_clock_t(p->utime), - total = utime + cputime_to_clock_t(p->stime); + cputime_t utime = p->utime, total = utime + p->stime; u64 temp; /* * Use CFS's precise accounting: */ - temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); + temp = (u64)nsecs_to_cputime(p->se.sum_exec_runtime); if (total) { temp *= utime; do_div(temp, total); } - utime = (clock_t)temp; + utime = (cputime_t)temp; - p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); + p->prev_utime = max(p->prev_utime, utime); return p->prev_utime; } cputime_t task_stime(struct task_struct *p) { - clock_t stime; + cputime_t stime; /* * Use CFS's precise accounting. (we subtract utime from * the total, to make sure the total observed by userspace * grows monotonically - apps rely on that): */ - stime = nsec_to_clock_t(p->se.sum_exec_runtime) - - cputime_to_clock_t(task_utime(p)); + stime = nsecs_to_cputime(p->se.sum_exec_runtime) - task_utime(p); if (stime >= 0) - p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); + p->prev_stime = max(p->prev_stime, stime); return p->prev_stime; } -- cgit v0.10.2 From 15cd8812ab2ce62a2f779e93a8398bdad752291a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 12 Nov 2009 18:15:43 -0500 Subject: x86: Remove the CPU cache size printk's They aren't really useful, and they pollute the dmesg output a lot (especially on machines with many cores). Also the same information can be trivially found out from userspace. Reported-by: Mike Travis Signed-off-by: Dave Jones Acked-by: H. Peter Anvin Cc: Andi Kleen Cc: Heiko Carstens Cc: Roland Dreier Cc: Randy Dunlap Cc: Tejun Heo Cc: Greg Kroah-Hartman Cc: Yinghai Lu Cc: David Rientjes Cc: Steven Rostedt Cc: Rusty Russell Cc: Hidetoshi Seto Cc: Jack Steiner Cc: Frederic Weisbecker LKML-Reference: <20091112231542.GA7129@redhat.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 804c40e..0df4c2b 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -488,22 +488,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) #endif } - if (trace) - printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if (l1i) - printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); - - if (l1d) - printk(KERN_CONT ", L1 D cache: %dK\n", l1d); - else - printk(KERN_CONT "\n"); - - if (l2) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - - if (l3) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; -- cgit v0.10.2 From a50bde5130f65733142b32975616427d0ea50856 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Nov 2009 15:55:28 +0100 Subject: sched: Cleanup select_task_rq_fair() Clean up the new affine to idle sibling bits while trying to grok them. Should not have any function differences. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20091112145610.832503781@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e4d4483..a32df15 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1319,6 +1319,41 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) } /* + * Try and locate an idle CPU in the sched_domain. + */ +static int +select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target) +{ + int cpu = smp_processor_id(); + int prev_cpu = task_cpu(p); + int i; + + /* + * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE + * test in select_task_rq_fair) and the prev_cpu is idle then that's + * always a better target than the current cpu. + */ + if (target == cpu) { + if (!cpu_rq(prev_cpu)->cfs.nr_running) + target = prev_cpu; + } + + /* + * Otherwise, iterate the domain and find an elegible idle cpu. + */ + if (target == -1 || target == cpu) { + for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { + if (!cpu_rq(i)->cfs.nr_running) { + target = i; + break; + } + } + } + + return target; +} + +/* * sched_balance_self: balance the current task (running on cpu) in domains * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and * SD_BALANCE_EXEC. @@ -1373,36 +1408,30 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag } if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) { - int candidate = -1, i; + int target = -1; + /* + * If both cpu and prev_cpu are part of this domain, + * cpu is a valid SD_WAKE_AFFINE target. + */ if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) - candidate = cpu; + target = cpu; /* - * Check for an idle shared cache. + * If there's an idle sibling in this domain, make that + * the wake_affine target instead of the current cpu. + * + * XXX: should we possibly do this outside of + * WAKE_AFFINE, in case the shared cache domain is + * smaller than the WAKE_AFFINE domain? */ - if (tmp->flags & SD_PREFER_SIBLING) { - if (candidate == cpu) { - if (!cpu_rq(prev_cpu)->cfs.nr_running) - candidate = prev_cpu; - } - - if (candidate == -1 || candidate == cpu) { - for_each_cpu(i, sched_domain_span(tmp)) { - if (!cpumask_test_cpu(i, &p->cpus_allowed)) - continue; - if (!cpu_rq(i)->cfs.nr_running) { - candidate = i; - break; - } - } - } - } + if (tmp->flags & SD_PREFER_SIBLING) + target = select_idle_sibling(p, tmp, target); - if (candidate >= 0) { + if (target >= 0) { affine_sd = tmp; want_affine = 0; - cpu = candidate; + cpu = target; } } -- cgit v0.10.2 From fe3bcfe1f6c1fc4ea7706ac2d05e579fd9092682 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 Nov 2009 15:55:29 +0100 Subject: sched: More generic WAKE_AFFINE vs select_idle_sibling() Instead of only considering SD_WAKE_AFFINE | SD_PREFER_SIBLING domains also allow all SD_PREFER_SIBLING domains below a SD_WAKE_AFFINE domain to change the affinity target. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: <20091112145610.909723612@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a32df15..f28a267 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1333,20 +1333,16 @@ select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target) * test in select_task_rq_fair) and the prev_cpu is idle then that's * always a better target than the current cpu. */ - if (target == cpu) { - if (!cpu_rq(prev_cpu)->cfs.nr_running) - target = prev_cpu; - } + if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running) + return prev_cpu; /* * Otherwise, iterate the domain and find an elegible idle cpu. */ - if (target == -1 || target == cpu) { - for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { - if (!cpu_rq(i)->cfs.nr_running) { - target = i; - break; - } + for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { + if (!cpu_rq(i)->cfs.nr_running) { + target = i; + break; } } @@ -1407,7 +1403,12 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag want_sd = 0; } - if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) { + /* + * While iterating the domains looking for a spanning + * WAKE_AFFINE domain, adjust the affine target to any idle cpu + * in cache sharing domains along the way. + */ + if (want_affine) { int target = -1; /* @@ -1420,17 +1421,15 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag /* * If there's an idle sibling in this domain, make that * the wake_affine target instead of the current cpu. - * - * XXX: should we possibly do this outside of - * WAKE_AFFINE, in case the shared cache domain is - * smaller than the WAKE_AFFINE domain? */ if (tmp->flags & SD_PREFER_SIBLING) target = select_idle_sibling(p, tmp, target); if (target >= 0) { - affine_sd = tmp; - want_affine = 0; + if (tmp->flags & SD_WAKE_AFFINE) { + affine_sd = tmp; + want_affine = 0; + } cpu = target; } } -- cgit v0.10.2 From b32e9eb6ad29572b4451847d0e8227c9be2b6d69 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 12 Nov 2009 22:35:03 -0800 Subject: rcu: Accelerate callback processing on CPUs not detecting GP end An earlier fix for a race resulted in a situation where the CPUs other than the CPU that detected the end of the grace period would not process their callbacks until the next grace period started. This means that these other CPUs would unnecessarily demand that an extra grace period be started. This patch eliminates this extra grace period and speeds callback processing by propagating rsp->completed to the rcu_node structures in the case where the CPU detecting the end of the grace period sees no reason to start a new grace period. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1258094104417-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d802419..b4efb9e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -676,7 +676,23 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) struct rcu_node *rnp = rcu_get_root(rsp); if (!cpu_needs_another_gp(rsp, rdp)) { - spin_unlock_irqrestore(&rnp->lock, flags); + if (rnp->completed == rsp->completed) { + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + + /* + * Propagate new ->completed value to rcu_node structures + * so that other CPUs don't have to wait until the start + * of the next grace period to process their callbacks. + */ + rcu_for_each_node_breadth_first(rsp, rnp) { + spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->completed = rsp->completed; + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + } + local_irq_restore(flags); return; } -- cgit v0.10.2 From 8e9aa8f067d2dcd9457980ced618e1cffbcfba46 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 12 Nov 2009 22:35:04 -0800 Subject: rcu: Simplify association of forced quiescent states with grace periods The force_quiescent_state() function also took a snapshot of the ->completed field, which was as obnoxious as it was in rcu_sched_qs() and friends. So snapshot ->gpnum-1. Also, since the dyntick_record_completed() and dyntick_recall_completed() functions are now simple assignments that are independent of CONFIG_NO_HZ, and since their names are now misleading, get rid of them. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12580941042308-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b4efb9e..3df0438 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -178,29 +178,9 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) return &rsp->node[0]; } -/* - * Record the specified "completed" value, which is later used to validate - * dynticks counter manipulations and CPU-offline checks. Specify - * "rsp->completed - 1" to unconditionally invalidate any future dynticks - * manipulations and CPU-offline checks. Such invalidation is useful at - * the beginning of a grace period. - */ -static void dyntick_record_completed(struct rcu_state *rsp, long comp) -{ - rsp->completed_fqs = comp; -} - #ifdef CONFIG_SMP /* - * Recall the previously recorded value of the completion for dynticks. - */ -static long dyntick_recall_completed(struct rcu_state *rsp) -{ - return rsp->completed_fqs; -} - -/* * If the specified CPU is offline, tell the caller that it is in * a quiescent state. Otherwise, whack it with a reschedule IPI. * Grace periods can end up waiting on an offline CPU when that @@ -702,7 +682,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; record_gp_stall_check_time(rsp); - dyntick_record_completed(rsp, rsp->completed - 1); /* Special-case the common single-level case. */ if (NUM_RCU_NODES == 1) { @@ -1214,7 +1193,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) goto unlock_ret; /* no emergency and done recently. */ rsp->n_force_qs++; spin_lock(&rnp->lock); - lastcomp = rsp->completed; + lastcomp = rsp->gpnum - 1; signaled = rsp->signaled; rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; if (lastcomp == rsp->gpnum) { @@ -1248,7 +1227,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) if (lastcomp == rsp->completed && rsp->signaled == signaled) { rsp->signaled = RCU_FORCE_QS; - dyntick_record_completed(rsp, lastcomp); + rsp->completed_fqs = lastcomp; forcenow = signaled == RCU_SAVE_COMPLETED; } spin_unlock(&rnp->lock); @@ -1259,7 +1238,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) case RCU_FORCE_QS: /* Check dyntick-idle state, send IPI to laggarts. */ - if (rcu_process_dyntick(rsp, dyntick_recall_completed(rsp), + if (rcu_process_dyntick(rsp, rsp->completed_fqs, rcu_implicit_dynticks_qs)) goto unlock_ret; -- cgit v0.10.2 From 67178767b936fb47a3a5e88097cff41ccbda7acb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Nov 2009 10:06:34 +0100 Subject: tracing: Rename 'lockdep' event subsystem into 'lock' Lockdep events subsystem gathers various locking related events such as a request, release, contention or acquisition of a lock. The name of this event subsystem is a bit of a misnomer since these events are not quite related to lockdep but more generally to locking, ie: these events are not reporting lock dependencies or possible deadlock scenario but pure locking events. Hence this rename. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Acked-by: Hitoshi Mitake Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras Cc: Steven Rostedt Cc: Li Zefan LKML-Reference: <1258103194-843-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h new file mode 100644 index 0000000..a870ba1 --- /dev/null +++ b/include/trace/events/lock.h @@ -0,0 +1,96 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM lock + +#if !defined(_TRACE_LOCK_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_LOCK_H + +#include +#include + +#ifdef CONFIG_LOCKDEP + +TRACE_EVENT(lock_acquire, + + TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + int trylock, int read, int check, + struct lockdep_map *next_lock, unsigned long ip), + + TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), + + TP_STRUCT__entry( + __field(unsigned int, flags) + __string(name, lock->name) + ), + + TP_fast_assign( + __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); + __assign_str(name, lock->name); + ), + + TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", + (__entry->flags & 2) ? "read " : "", + __get_str(name)) +); + +TRACE_EVENT(lock_release, + + TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), + + TP_ARGS(lock, nested, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +#ifdef CONFIG_LOCK_STAT + +TRACE_EVENT(lock_contended, + + TP_PROTO(struct lockdep_map *lock, unsigned long ip), + + TP_ARGS(lock, ip), + + TP_STRUCT__entry( + __string(name, lock->name) + ), + + TP_fast_assign( + __assign_str(name, lock->name); + ), + + TP_printk("%s", __get_str(name)) +); + +TRACE_EVENT(lock_acquired, + TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), + + TP_ARGS(lock, ip, waittime), + + TP_STRUCT__entry( + __string(name, lock->name) + __field(unsigned long, wait_usec) + __field(unsigned long, wait_nsec_rem) + ), + TP_fast_assign( + __assign_str(name, lock->name); + __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); + __entry->wait_usec = (unsigned long) waittime; + ), + TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, + __entry->wait_nsec_rem) +); + +#endif +#endif + +#endif /* _TRACE_LOCK_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h deleted file mode 100644 index bcf1d20..0000000 --- a/include/trace/events/lockdep.h +++ /dev/null @@ -1,96 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM lockdep - -#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_LOCKDEP_H - -#include -#include - -#ifdef CONFIG_LOCKDEP - -TRACE_EVENT(lock_acquire, - - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, - int trylock, int read, int check, - struct lockdep_map *next_lock, unsigned long ip), - - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - - TP_STRUCT__entry( - __field(unsigned int, flags) - __string(name, lock->name) - ), - - TP_fast_assign( - __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0); - __assign_str(name, lock->name); - ), - - TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "", - (__entry->flags & 2) ? "read " : "", - __get_str(name)) -); - -TRACE_EVENT(lock_release, - - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - - TP_ARGS(lock, nested, ip), - - TP_STRUCT__entry( - __string(name, lock->name) - ), - - TP_fast_assign( - __assign_str(name, lock->name); - ), - - TP_printk("%s", __get_str(name)) -); - -#ifdef CONFIG_LOCK_STAT - -TRACE_EVENT(lock_contended, - - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - - TP_ARGS(lock, ip), - - TP_STRUCT__entry( - __string(name, lock->name) - ), - - TP_fast_assign( - __assign_str(name, lock->name); - ), - - TP_printk("%s", __get_str(name)) -); - -TRACE_EVENT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime), - - TP_ARGS(lock, ip, waittime), - - TP_STRUCT__entry( - __string(name, lock->name) - __field(unsigned long, wait_usec) - __field(unsigned long, wait_nsec_rem) - ), - TP_fast_assign( - __assign_str(name, lock->name); - __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC); - __entry->wait_usec = (unsigned long) waittime; - ), - TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec, - __entry->wait_nsec_rem) -); - -#endif -#endif - -#endif /* _TRACE_LOCKDEP_H */ - -/* This part must be outside protection */ -#include diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 9af5672..f5dcd36 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -49,7 +49,7 @@ #include "lockdep_internals.h" #define CREATE_TRACE_POINTS -#include +#include #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; -- cgit v0.10.2 From 6beb000923882f6204ea2cfcd932e568e900803f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Nov 2009 15:21:34 +0000 Subject: locking: Make inlining decision Kconfig based commit 892a7c67 (locking: Allow arch-inlined spinlocks) implements the selection of which lock functions are inlined based on defines in arch/.../spinlock.h: #define __always_inline__LOCK_FUNCTION Despite of the name __always_inline__* the lock functions can be built out of line depending on config options. Also if the arch does not set some inline defines the generic code might set them; again depending on config options. This makes it unnecessary hard to figure out when and which lock functions are inlined. Aside of that it makes it way harder and messier for -rt to manipulate the lock functions. Convert the inlining decision to CONFIG switches. Each lock function is inlined depending on CONFIG_INLINE_*. The configs implement the existing dependencies. The architecture code can select ARCH_INLINE_* to signal that it wants the corresponding lock function inlined. ARCH_INLINE_* is necessary as Kconfig ignores "depends on" restrictions when a config element is selected. No functional change. Signed-off-by: Thomas Gleixner LKML-Reference: <20091109151428.504477141@linutronix.de> Acked-by: Heiko Carstens Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 43c0aca..16c6730 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -95,6 +95,34 @@ config S390 select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE select HAVE_PERF_EVENTS + select ARCH_INLINE_SPIN_TRYLOCK + select ARCH_INLINE_SPIN_TRYLOCK_BH + select ARCH_INLINE_SPIN_LOCK + select ARCH_INLINE_SPIN_LOCK_BH + select ARCH_INLINE_SPIN_LOCK_IRQ + select ARCH_INLINE_SPIN_LOCK_IRQSAVE + select ARCH_INLINE_SPIN_UNLOCK + select ARCH_INLINE_SPIN_UNLOCK_BH + select ARCH_INLINE_SPIN_UNLOCK_IRQ + select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + select ARCH_INLINE_READ_TRYLOCK + select ARCH_INLINE_READ_LOCK + select ARCH_INLINE_READ_LOCK_BH + select ARCH_INLINE_READ_LOCK_IRQ + select ARCH_INLINE_READ_LOCK_IRQSAVE + select ARCH_INLINE_READ_UNLOCK + select ARCH_INLINE_READ_UNLOCK_BH + select ARCH_INLINE_READ_UNLOCK_IRQ + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE + select ARCH_INLINE_WRITE_TRYLOCK + select ARCH_INLINE_WRITE_LOCK + select ARCH_INLINE_WRITE_LOCK_BH + select ARCH_INLINE_WRITE_LOCK_IRQ + select ARCH_INLINE_WRITE_LOCK_IRQSAVE + select ARCH_INLINE_WRITE_UNLOCK + select ARCH_INLINE_WRITE_UNLOCK_BH + select ARCH_INLINE_WRITE_UNLOCK_IRQ + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE config SCHED_OMIT_FRAME_POINTER bool diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 41ce686..c9af0d19 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -191,33 +191,4 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) #define _raw_read_relax(lock) cpu_relax() #define _raw_write_relax(lock) cpu_relax() -#define __always_inline__spin_lock -#define __always_inline__read_lock -#define __always_inline__write_lock -#define __always_inline__spin_lock_bh -#define __always_inline__read_lock_bh -#define __always_inline__write_lock_bh -#define __always_inline__spin_lock_irq -#define __always_inline__read_lock_irq -#define __always_inline__write_lock_irq -#define __always_inline__spin_lock_irqsave -#define __always_inline__read_lock_irqsave -#define __always_inline__write_lock_irqsave -#define __always_inline__spin_trylock -#define __always_inline__read_trylock -#define __always_inline__write_trylock -#define __always_inline__spin_trylock_bh -#define __always_inline__spin_unlock -#define __always_inline__read_unlock -#define __always_inline__write_unlock -#define __always_inline__spin_unlock_bh -#define __always_inline__read_unlock_bh -#define __always_inline__write_unlock_bh -#define __always_inline__spin_unlock_irq -#define __always_inline__read_unlock_irq -#define __always_inline__write_unlock_irq -#define __always_inline__spin_unlock_irqrestore -#define __always_inline__read_unlock_irqrestore -#define __always_inline__write_unlock_irqrestore - #endif /* __ASM_SPINLOCK_H */ diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 7a7e18f..8264a7f 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -60,137 +60,118 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); -/* - * We inline the unlock functions in the nondebug case: - */ -#if !defined(CONFIG_DEBUG_SPINLOCK) && !defined(CONFIG_PREEMPT) -#define __always_inline__spin_unlock -#define __always_inline__read_unlock -#define __always_inline__write_unlock -#define __always_inline__spin_unlock_irq -#define __always_inline__read_unlock_irq -#define __always_inline__write_unlock_irq -#endif - -#ifndef CONFIG_DEBUG_SPINLOCK -#ifndef CONFIG_GENERIC_LOCKBREAK - -#ifdef __always_inline__spin_lock +#ifdef CONFIG_INLINE_SPIN_LOCK #define _spin_lock(lock) __spin_lock(lock) #endif -#ifdef __always_inline__read_lock +#ifdef CONFIG_INLINE_READ_LOCK #define _read_lock(lock) __read_lock(lock) #endif -#ifdef __always_inline__write_lock +#ifdef CONFIG_INLINE_WRITE_LOCK #define _write_lock(lock) __write_lock(lock) #endif -#ifdef __always_inline__spin_lock_bh +#ifdef CONFIG_INLINE_SPIN_LOCK_BH #define _spin_lock_bh(lock) __spin_lock_bh(lock) #endif -#ifdef __always_inline__read_lock_bh +#ifdef CONFIG_INLINE_READ_LOCK_BH #define _read_lock_bh(lock) __read_lock_bh(lock) #endif -#ifdef __always_inline__write_lock_bh +#ifdef CONFIG_INLINE_WRITE_LOCK_BH #define _write_lock_bh(lock) __write_lock_bh(lock) #endif -#ifdef __always_inline__spin_lock_irq +#ifdef CONFIG_INLINE_SPIN_LOCK_IRQ #define _spin_lock_irq(lock) __spin_lock_irq(lock) #endif -#ifdef __always_inline__read_lock_irq +#ifdef CONFIG_INLINE_READ_LOCK_IRQ #define _read_lock_irq(lock) __read_lock_irq(lock) #endif -#ifdef __always_inline__write_lock_irq +#ifdef CONFIG_INLINE_WRITE_LOCK_IRQ #define _write_lock_irq(lock) __write_lock_irq(lock) #endif -#ifdef __always_inline__spin_lock_irqsave +#ifdef CONFIG_INLINE_SPIN_LOCK_IRQSAVE #define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock) #endif -#ifdef __always_inline__read_lock_irqsave +#ifdef CONFIG_INLINE_READ_LOCK_IRQSAVE #define _read_lock_irqsave(lock) __read_lock_irqsave(lock) #endif -#ifdef __always_inline__write_lock_irqsave +#ifdef CONFIG_INLINE_WRITE_LOCK_IRQSAVE #define _write_lock_irqsave(lock) __write_lock_irqsave(lock) #endif -#endif /* !CONFIG_GENERIC_LOCKBREAK */ - -#ifdef __always_inline__spin_trylock +#ifdef CONFIG_INLINE_SPIN_TRYLOCK #define _spin_trylock(lock) __spin_trylock(lock) #endif -#ifdef __always_inline__read_trylock +#ifdef CONFIG_INLINE_READ_TRYLOCK #define _read_trylock(lock) __read_trylock(lock) #endif -#ifdef __always_inline__write_trylock +#ifdef CONFIG_INLINE_WRITE_TRYLOCK #define _write_trylock(lock) __write_trylock(lock) #endif -#ifdef __always_inline__spin_trylock_bh +#ifdef CONFIG_INLINE_SPIN_TRYLOCK_BH #define _spin_trylock_bh(lock) __spin_trylock_bh(lock) #endif -#ifdef __always_inline__spin_unlock +#ifdef CONFIG_INLINE_SPIN_UNLOCK #define _spin_unlock(lock) __spin_unlock(lock) #endif -#ifdef __always_inline__read_unlock +#ifdef CONFIG_INLINE_READ_UNLOCK #define _read_unlock(lock) __read_unlock(lock) #endif -#ifdef __always_inline__write_unlock +#ifdef CONFIG_INLINE_WRITE_UNLOCK #define _write_unlock(lock) __write_unlock(lock) #endif -#ifdef __always_inline__spin_unlock_bh +#ifdef CONFIG_INLINE_SPIN_UNLOCK_BH #define _spin_unlock_bh(lock) __spin_unlock_bh(lock) #endif -#ifdef __always_inline__read_unlock_bh +#ifdef CONFIG_INLINE_READ_UNLOCK_BH #define _read_unlock_bh(lock) __read_unlock_bh(lock) #endif -#ifdef __always_inline__write_unlock_bh +#ifdef CONFIG_INLINE_WRITE_UNLOCK_BH #define _write_unlock_bh(lock) __write_unlock_bh(lock) #endif -#ifdef __always_inline__spin_unlock_irq +#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQ #define _spin_unlock_irq(lock) __spin_unlock_irq(lock) #endif -#ifdef __always_inline__read_unlock_irq +#ifdef CONFIG_INLINE_READ_UNLOCK_IRQ #define _read_unlock_irq(lock) __read_unlock_irq(lock) #endif -#ifdef __always_inline__write_unlock_irq +#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQ #define _write_unlock_irq(lock) __write_unlock_irq(lock) #endif -#ifdef __always_inline__spin_unlock_irqrestore +#ifdef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE #define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags) #endif -#ifdef __always_inline__read_unlock_irqrestore +#ifdef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE #define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags) #endif -#ifdef __always_inline__write_unlock_irqrestore +#ifdef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE #define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags) #endif -#endif /* CONFIG_DEBUG_SPINLOCK */ - static inline int __spin_trylock(spinlock_t *lock) { preempt_disable(); diff --git a/init/Kconfig b/init/Kconfig index 9e03ef8..06863dd 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1210,3 +1210,4 @@ source "block/Kconfig" config PREEMPT_NOTIFIERS bool +source "kernel/Kconfig.locks" diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks new file mode 100644 index 0000000..d1f86db --- /dev/null +++ b/kernel/Kconfig.locks @@ -0,0 +1,199 @@ +# +# The ARCH_INLINE foo is necessary because select ignores "depends on" +# +config ARCH_INLINE_SPIN_TRYLOCK + bool + +config ARCH_INLINE_SPIN_TRYLOCK_BH + bool + +config ARCH_INLINE_SPIN_LOCK + bool + +config ARCH_INLINE_SPIN_LOCK_BH + bool + +config ARCH_INLINE_SPIN_LOCK_IRQ + bool + +config ARCH_INLINE_SPIN_LOCK_IRQSAVE + bool + +config ARCH_INLINE_SPIN_UNLOCK + bool + +config ARCH_INLINE_SPIN_UNLOCK_BH + bool + +config ARCH_INLINE_SPIN_UNLOCK_IRQ + bool + +config ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + bool + + +config ARCH_INLINE_READ_TRYLOCK + bool + +config ARCH_INLINE_READ_LOCK + bool + +config ARCH_INLINE_READ_LOCK_BH + bool + +config ARCH_INLINE_READ_LOCK_IRQ + bool + +config ARCH_INLINE_READ_LOCK_IRQSAVE + bool + +config ARCH_INLINE_READ_UNLOCK + bool + +config ARCH_INLINE_READ_UNLOCK_BH + bool + +config ARCH_INLINE_READ_UNLOCK_IRQ + bool + +config ARCH_INLINE_READ_UNLOCK_IRQRESTORE + bool + + +config ARCH_INLINE_WRITE_TRYLOCK + bool + +config ARCH_INLINE_WRITE_LOCK + bool + +config ARCH_INLINE_WRITE_LOCK_BH + bool + +config ARCH_INLINE_WRITE_LOCK_IRQ + bool + +config ARCH_INLINE_WRITE_LOCK_IRQSAVE + bool + +config ARCH_INLINE_WRITE_UNLOCK + bool + +config ARCH_INLINE_WRITE_UNLOCK_BH + bool + +config ARCH_INLINE_WRITE_UNLOCK_IRQ + bool + +config ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + bool + +# +# lock_* functions are inlined when: +# - DEBUG_SPINLOCK=n and GENERIC_LOCKBREAK=n and ARCH_INLINE_*LOCK=y +# +# trylock_* functions are inlined when: +# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y +# +# unlock and unlock_irq functions are inlined when: +# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y +# or +# - DEBUG_SPINLOCK=n and PREEMPT=n +# +# unlock_bh and unlock_irqrestore functions are inlined when: +# - DEBUG_SPINLOCK=n and ARCH_INLINE_*LOCK=y +# + +config INLINE_SPIN_TRYLOCK + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK + +config INLINE_SPIN_TRYLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_TRYLOCK_BH + +config INLINE_SPIN_LOCK + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_SPIN_LOCK + +config INLINE_SPIN_LOCK_BH + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_SPIN_LOCK_BH + +config INLINE_SPIN_LOCK_IRQ + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_SPIN_LOCK_IRQ + +config INLINE_SPIN_LOCK_IRQSAVE + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_SPIN_LOCK_IRQSAVE + +config INLINE_SPIN_UNLOCK + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK) + +config INLINE_SPIN_UNLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_BH + +config INLINE_SPIN_UNLOCK_IRQ + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_SPIN_UNLOCK_BH) + +config INLINE_SPIN_UNLOCK_IRQRESTORE + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE + + +config INLINE_READ_TRYLOCK + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_TRYLOCK + +config INLINE_READ_LOCK + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_READ_LOCK + +config INLINE_READ_LOCK_BH + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_READ_LOCK_BH + +config INLINE_READ_LOCK_IRQ + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_READ_LOCK_IRQ + +config INLINE_READ_LOCK_IRQSAVE + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_READ_LOCK_IRQSAVE + +config INLINE_READ_UNLOCK + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK) + +config INLINE_READ_UNLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_BH + +config INLINE_READ_UNLOCK_IRQ + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_READ_UNLOCK_BH) + +config INLINE_READ_UNLOCK_IRQRESTORE + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_READ_UNLOCK_IRQRESTORE + + +config INLINE_WRITE_TRYLOCK + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_TRYLOCK + +config INLINE_WRITE_LOCK + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && ARCH_INLINE_WRITE_LOCK + +config INLINE_WRITE_LOCK_BH + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_WRITE_LOCK_BH + +config INLINE_WRITE_LOCK_IRQ + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_WRITE_LOCK_IRQ + +config INLINE_WRITE_LOCK_IRQSAVE + def_bool !DEBUG_SPINLOCK && !GENERIC_LOCKBREAK && \ + ARCH_INLINE_WRITE_LOCK_IRQSAVE + +config INLINE_WRITE_UNLOCK + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK) + +config INLINE_WRITE_UNLOCK_BH + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_BH + +config INLINE_WRITE_UNLOCK_IRQ + def_bool !DEBUG_SPINLOCK && (!PREEMPT || ARCH_INLINE_WRITE_UNLOCK_BH) + +config INLINE_WRITE_UNLOCK_IRQRESTORE + def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 5ddab73..235a957 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -21,7 +21,7 @@ #include #include -#ifndef _spin_trylock +#ifndef CONFIG_INLINE_SPIN_TRYLOCK int __lockfunc _spin_trylock(spinlock_t *lock) { return __spin_trylock(lock); @@ -29,7 +29,7 @@ int __lockfunc _spin_trylock(spinlock_t *lock) EXPORT_SYMBOL(_spin_trylock); #endif -#ifndef _read_trylock +#ifndef CONFIG_INLINE_READ_TRYLOCK int __lockfunc _read_trylock(rwlock_t *lock) { return __read_trylock(lock); @@ -37,7 +37,7 @@ int __lockfunc _read_trylock(rwlock_t *lock) EXPORT_SYMBOL(_read_trylock); #endif -#ifndef _write_trylock +#ifndef CONFIG_INLINE_WRITE_TRYLOCK int __lockfunc _write_trylock(rwlock_t *lock) { return __write_trylock(lock); @@ -52,7 +52,7 @@ EXPORT_SYMBOL(_write_trylock); */ #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) -#ifndef _read_lock +#ifndef CONFIG_INLINE_READ_LOCK void __lockfunc _read_lock(rwlock_t *lock) { __read_lock(lock); @@ -60,7 +60,7 @@ void __lockfunc _read_lock(rwlock_t *lock) EXPORT_SYMBOL(_read_lock); #endif -#ifndef _spin_lock_irqsave +#ifndef CONFIG_INLINE_SPIN_LOCK_IRQSAVE unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) { return __spin_lock_irqsave(lock); @@ -68,7 +68,7 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) EXPORT_SYMBOL(_spin_lock_irqsave); #endif -#ifndef _spin_lock_irq +#ifndef CONFIG_INLINE_SPIN_LOCK_IRQ void __lockfunc _spin_lock_irq(spinlock_t *lock) { __spin_lock_irq(lock); @@ -76,7 +76,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock) EXPORT_SYMBOL(_spin_lock_irq); #endif -#ifndef _spin_lock_bh +#ifndef CONFIG_INLINE_SPIN_LOCK_BH void __lockfunc _spin_lock_bh(spinlock_t *lock) { __spin_lock_bh(lock); @@ -84,7 +84,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock) EXPORT_SYMBOL(_spin_lock_bh); #endif -#ifndef _read_lock_irqsave +#ifndef CONFIG_INLINE_READ_LOCK_IRQSAVE unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) { return __read_lock_irqsave(lock); @@ -92,7 +92,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) EXPORT_SYMBOL(_read_lock_irqsave); #endif -#ifndef _read_lock_irq +#ifndef CONFIG_INLINE_READ_LOCK_IRQ void __lockfunc _read_lock_irq(rwlock_t *lock) { __read_lock_irq(lock); @@ -100,7 +100,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock) EXPORT_SYMBOL(_read_lock_irq); #endif -#ifndef _read_lock_bh +#ifndef CONFIG_INLINE_READ_LOCK_BH void __lockfunc _read_lock_bh(rwlock_t *lock) { __read_lock_bh(lock); @@ -108,7 +108,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock) EXPORT_SYMBOL(_read_lock_bh); #endif -#ifndef _write_lock_irqsave +#ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) { return __write_lock_irqsave(lock); @@ -116,7 +116,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) EXPORT_SYMBOL(_write_lock_irqsave); #endif -#ifndef _write_lock_irq +#ifndef CONFIG_INLINE_WRITE_LOCK_IRQ void __lockfunc _write_lock_irq(rwlock_t *lock) { __write_lock_irq(lock); @@ -124,7 +124,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock) EXPORT_SYMBOL(_write_lock_irq); #endif -#ifndef _write_lock_bh +#ifndef CONFIG_INLINE_WRITE_LOCK_BH void __lockfunc _write_lock_bh(rwlock_t *lock) { __write_lock_bh(lock); @@ -132,7 +132,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock) EXPORT_SYMBOL(_write_lock_bh); #endif -#ifndef _spin_lock +#ifndef CONFIG_INLINE_SPIN_LOCK void __lockfunc _spin_lock(spinlock_t *lock) { __spin_lock(lock); @@ -140,7 +140,7 @@ void __lockfunc _spin_lock(spinlock_t *lock) EXPORT_SYMBOL(_spin_lock); #endif -#ifndef _write_lock +#ifndef CONFIG_INLINE_WRITE_LOCK void __lockfunc _write_lock(rwlock_t *lock) { __write_lock(lock); @@ -272,7 +272,7 @@ EXPORT_SYMBOL(_spin_lock_nest_lock); #endif -#ifndef _spin_unlock +#ifndef CONFIG_INLINE_SPIN_UNLOCK void __lockfunc _spin_unlock(spinlock_t *lock) { __spin_unlock(lock); @@ -280,7 +280,7 @@ void __lockfunc _spin_unlock(spinlock_t *lock) EXPORT_SYMBOL(_spin_unlock); #endif -#ifndef _write_unlock +#ifndef CONFIG_INLINE_WRITE_UNLOCK void __lockfunc _write_unlock(rwlock_t *lock) { __write_unlock(lock); @@ -288,7 +288,7 @@ void __lockfunc _write_unlock(rwlock_t *lock) EXPORT_SYMBOL(_write_unlock); #endif -#ifndef _read_unlock +#ifndef CONFIG_INLINE_READ_UNLOCK void __lockfunc _read_unlock(rwlock_t *lock) { __read_unlock(lock); @@ -296,7 +296,7 @@ void __lockfunc _read_unlock(rwlock_t *lock) EXPORT_SYMBOL(_read_unlock); #endif -#ifndef _spin_unlock_irqrestore +#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) { __spin_unlock_irqrestore(lock, flags); @@ -304,7 +304,7 @@ void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) EXPORT_SYMBOL(_spin_unlock_irqrestore); #endif -#ifndef _spin_unlock_irq +#ifndef CONFIG_INLINE_SPIN_UNLOCK_IRQ void __lockfunc _spin_unlock_irq(spinlock_t *lock) { __spin_unlock_irq(lock); @@ -312,7 +312,7 @@ void __lockfunc _spin_unlock_irq(spinlock_t *lock) EXPORT_SYMBOL(_spin_unlock_irq); #endif -#ifndef _spin_unlock_bh +#ifndef CONFIG_INLINE_SPIN_UNLOCK_BH void __lockfunc _spin_unlock_bh(spinlock_t *lock) { __spin_unlock_bh(lock); @@ -320,7 +320,7 @@ void __lockfunc _spin_unlock_bh(spinlock_t *lock) EXPORT_SYMBOL(_spin_unlock_bh); #endif -#ifndef _read_unlock_irqrestore +#ifndef CONFIG_INLINE_READ_UNLOCK_IRQRESTORE void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { __read_unlock_irqrestore(lock, flags); @@ -328,7 +328,7 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags) EXPORT_SYMBOL(_read_unlock_irqrestore); #endif -#ifndef _read_unlock_irq +#ifndef CONFIG_INLINE_READ_UNLOCK_IRQ void __lockfunc _read_unlock_irq(rwlock_t *lock) { __read_unlock_irq(lock); @@ -336,7 +336,7 @@ void __lockfunc _read_unlock_irq(rwlock_t *lock) EXPORT_SYMBOL(_read_unlock_irq); #endif -#ifndef _read_unlock_bh +#ifndef CONFIG_INLINE_READ_UNLOCK_BH void __lockfunc _read_unlock_bh(rwlock_t *lock) { __read_unlock_bh(lock); @@ -344,7 +344,7 @@ void __lockfunc _read_unlock_bh(rwlock_t *lock) EXPORT_SYMBOL(_read_unlock_bh); #endif -#ifndef _write_unlock_irqrestore +#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) { __write_unlock_irqrestore(lock, flags); @@ -352,7 +352,7 @@ void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) EXPORT_SYMBOL(_write_unlock_irqrestore); #endif -#ifndef _write_unlock_irq +#ifndef CONFIG_INLINE_WRITE_UNLOCK_IRQ void __lockfunc _write_unlock_irq(rwlock_t *lock) { __write_unlock_irq(lock); @@ -360,7 +360,7 @@ void __lockfunc _write_unlock_irq(rwlock_t *lock) EXPORT_SYMBOL(_write_unlock_irq); #endif -#ifndef _write_unlock_bh +#ifndef CONFIG_INLINE_WRITE_UNLOCK_BH void __lockfunc _write_unlock_bh(rwlock_t *lock) { __write_unlock_bh(lock); @@ -368,7 +368,7 @@ void __lockfunc _write_unlock_bh(rwlock_t *lock) EXPORT_SYMBOL(_write_unlock_bh); #endif -#ifndef _spin_trylock_bh +#ifndef CONFIG_INLINE_SPIN_TRYLOCK_BH int __lockfunc _spin_trylock_bh(spinlock_t *lock) { return __spin_trylock_bh(lock); -- cgit v0.10.2 From 8e13c7b772387f55dc05c6a0e5b30010c3c46ff9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 9 Nov 2009 15:21:41 +0000 Subject: locking: Reduce ifdefs in kernel/spinlock.c With the Kconfig based inline decisions we can remove extra ifdefs in kernel/spinlock.c by creating the complex lockbreak functions as inlines which are inserted into the non inlined lock functions. No functional change. Signed-off-by: Thomas Gleixner LKML-Reference: <20091109151428.548614772@linutronix.de> Acked-by: Heiko Carstens Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra diff --git a/kernel/spinlock.c b/kernel/spinlock.c index 235a957..41e0422 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -21,6 +21,133 @@ #include #include +/* + * If lockdep is enabled then we use the non-preemption spin-ops + * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are + * not re-enabled during lock-acquire (which the preempt-spin-ops do): + */ +#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) +/* + * The __lock_function inlines are taken from + * include/linux/spinlock_api_smp.h + */ +#else +/* + * We build the __lock_function inlines here. They are too large for + * inlining all over the place, but here is only one user per function + * which embedds them into the calling _lock_function below. + * + * This could be a long-held lock. We both prepare to spin for a long + * time (making _this_ CPU preemptable if possible), and we also signal + * towards that other CPU that it should break the lock ASAP. + */ +#define BUILD_LOCK_OPS(op, locktype) \ +void __lockfunc __##op##_lock(locktype##_t *lock) \ +{ \ + for (;;) { \ + preempt_disable(); \ + if (likely(_raw_##op##_trylock(lock))) \ + break; \ + preempt_enable(); \ + \ + if (!(lock)->break_lock) \ + (lock)->break_lock = 1; \ + while (!op##_can_lock(lock) && (lock)->break_lock) \ + _raw_##op##_relax(&lock->raw_lock); \ + } \ + (lock)->break_lock = 0; \ +} \ + \ +unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock) \ +{ \ + unsigned long flags; \ + \ + for (;;) { \ + preempt_disable(); \ + local_irq_save(flags); \ + if (likely(_raw_##op##_trylock(lock))) \ + break; \ + local_irq_restore(flags); \ + preempt_enable(); \ + \ + if (!(lock)->break_lock) \ + (lock)->break_lock = 1; \ + while (!op##_can_lock(lock) && (lock)->break_lock) \ + _raw_##op##_relax(&lock->raw_lock); \ + } \ + (lock)->break_lock = 0; \ + return flags; \ +} \ + \ +void __lockfunc __##op##_lock_irq(locktype##_t *lock) \ +{ \ + _##op##_lock_irqsave(lock); \ +} \ + \ +void __lockfunc __##op##_lock_bh(locktype##_t *lock) \ +{ \ + unsigned long flags; \ + \ + /* */ \ + /* Careful: we must exclude softirqs too, hence the */ \ + /* irq-disabling. We use the generic preemption-aware */ \ + /* function: */ \ + /**/ \ + flags = _##op##_lock_irqsave(lock); \ + local_bh_disable(); \ + local_irq_restore(flags); \ +} \ + +/* + * Build preemption-friendly versions of the following + * lock-spinning functions: + * + * __[spin|read|write]_lock() + * __[spin|read|write]_lock_irq() + * __[spin|read|write]_lock_irqsave() + * __[spin|read|write]_lock_bh() + */ +BUILD_LOCK_OPS(spin, spinlock); +BUILD_LOCK_OPS(read, rwlock); +BUILD_LOCK_OPS(write, rwlock); + +#endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) +{ + preempt_disable(); + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +} +EXPORT_SYMBOL(_spin_lock_nested); + +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, + int subclass) +{ + unsigned long flags; + + local_irq_save(flags); + preempt_disable(); + spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + LOCK_CONTENDED_FLAGS(lock, _raw_spin_trylock, _raw_spin_lock, + _raw_spin_lock_flags, &flags); + return flags; +} +EXPORT_SYMBOL(_spin_lock_irqsave_nested); + +void __lockfunc _spin_lock_nest_lock(spinlock_t *lock, + struct lockdep_map *nest_lock) +{ + preempt_disable(); + spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); + LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); +} +EXPORT_SYMBOL(_spin_lock_nest_lock); + +#endif + #ifndef CONFIG_INLINE_SPIN_TRYLOCK int __lockfunc _spin_trylock(spinlock_t *lock) { @@ -45,13 +172,6 @@ int __lockfunc _write_trylock(rwlock_t *lock) EXPORT_SYMBOL(_write_trylock); #endif -/* - * If lockdep is enabled then we use the non-preemption spin-ops - * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are - * not re-enabled during lock-acquire (which the preempt-spin-ops do): - */ -#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) - #ifndef CONFIG_INLINE_READ_LOCK void __lockfunc _read_lock(rwlock_t *lock) { @@ -148,130 +268,6 @@ void __lockfunc _write_lock(rwlock_t *lock) EXPORT_SYMBOL(_write_lock); #endif -#else /* CONFIG_PREEMPT: */ - -/* - * This could be a long-held lock. We both prepare to spin for a long - * time (making _this_ CPU preemptable if possible), and we also signal - * towards that other CPU that it should break the lock ASAP. - * - * (We do this in a function because inlining it would be excessive.) - */ - -#define BUILD_LOCK_OPS(op, locktype) \ -void __lockfunc _##op##_lock(locktype##_t *lock) \ -{ \ - for (;;) { \ - preempt_disable(); \ - if (likely(_raw_##op##_trylock(lock))) \ - break; \ - preempt_enable(); \ - \ - if (!(lock)->break_lock) \ - (lock)->break_lock = 1; \ - while (!op##_can_lock(lock) && (lock)->break_lock) \ - _raw_##op##_relax(&lock->raw_lock); \ - } \ - (lock)->break_lock = 0; \ -} \ - \ -EXPORT_SYMBOL(_##op##_lock); \ - \ -unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock) \ -{ \ - unsigned long flags; \ - \ - for (;;) { \ - preempt_disable(); \ - local_irq_save(flags); \ - if (likely(_raw_##op##_trylock(lock))) \ - break; \ - local_irq_restore(flags); \ - preempt_enable(); \ - \ - if (!(lock)->break_lock) \ - (lock)->break_lock = 1; \ - while (!op##_can_lock(lock) && (lock)->break_lock) \ - _raw_##op##_relax(&lock->raw_lock); \ - } \ - (lock)->break_lock = 0; \ - return flags; \ -} \ - \ -EXPORT_SYMBOL(_##op##_lock_irqsave); \ - \ -void __lockfunc _##op##_lock_irq(locktype##_t *lock) \ -{ \ - _##op##_lock_irqsave(lock); \ -} \ - \ -EXPORT_SYMBOL(_##op##_lock_irq); \ - \ -void __lockfunc _##op##_lock_bh(locktype##_t *lock) \ -{ \ - unsigned long flags; \ - \ - /* */ \ - /* Careful: we must exclude softirqs too, hence the */ \ - /* irq-disabling. We use the generic preemption-aware */ \ - /* function: */ \ - /**/ \ - flags = _##op##_lock_irqsave(lock); \ - local_bh_disable(); \ - local_irq_restore(flags); \ -} \ - \ -EXPORT_SYMBOL(_##op##_lock_bh) - -/* - * Build preemption-friendly versions of the following - * lock-spinning functions: - * - * _[spin|read|write]_lock() - * _[spin|read|write]_lock_irq() - * _[spin|read|write]_lock_irqsave() - * _[spin|read|write]_lock_bh() - */ -BUILD_LOCK_OPS(spin, spinlock); -BUILD_LOCK_OPS(read, rwlock); -BUILD_LOCK_OPS(write, rwlock); - -#endif /* CONFIG_PREEMPT */ - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - -void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass) -{ - preempt_disable(); - spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); -} -EXPORT_SYMBOL(_spin_lock_nested); - -unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass) -{ - unsigned long flags; - - local_irq_save(flags); - preempt_disable(); - spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - LOCK_CONTENDED_FLAGS(lock, _raw_spin_trylock, _raw_spin_lock, - _raw_spin_lock_flags, &flags); - return flags; -} -EXPORT_SYMBOL(_spin_lock_irqsave_nested); - -void __lockfunc _spin_lock_nest_lock(spinlock_t *lock, - struct lockdep_map *nest_lock) -{ - preempt_disable(); - spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_); - LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock); -} -EXPORT_SYMBOL(_spin_lock_nest_lock); - -#endif - #ifndef CONFIG_INLINE_SPIN_UNLOCK void __lockfunc _spin_unlock(spinlock_t *lock) { -- cgit v0.10.2 From 687b16fb617bd446439425a368ad7c7bbd202c73 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 13 Nov 2009 13:16:15 +0100 Subject: hw-breakpoints: Provide an off-case for counter_arch_bp() If an arch doesn't support the hw breakpoints, counter_arch_bp() has no off case to cover the missing breakpoint info structure from the perf event. The result is a build error in non-x86 configs. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: Prasad LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Prasad diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 7eba9b9..18710e0 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -16,11 +16,6 @@ enum { HW_BREAKPOINT_X = 4, }; -static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) -{ - return &bp->hw.info; -} - static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -83,6 +78,11 @@ extern void release_bp_slot(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return &bp->hw.info; +} + #else /* !CONFIG_HAVE_HW_BREAKPOINT */ static inline struct perf_event * @@ -126,6 +126,11 @@ static inline void release_bp_slot(struct perf_event *bp) { } static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } +static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) +{ + return NULL; +} + #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* _LINUX_HW_BREAKPOINT_H */ -- cgit v0.10.2 From 0388423dba2217b4e5b6c61690b0506d13b25a49 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 13 Nov 2009 15:30:00 -0500 Subject: x86: Minimise printk spew from per-vendor init code In the default case where the kernel supports all CPU vendors, we currently print out a bunch of not useful messages on every system. 32-bit: KERNEL supported cpus: Intel GenuineIntel AMD AuthenticAMD NSC Geode by NSC Cyrix CyrixInstead Centaur CentaurHauls Transmeta GenuineTMx86 Transmeta TransmetaCPU UMC UMC UMC UMC 64-bit: KERNEL supported cpus: Intel GenuineIntel AMD AuthenticAMD Centaur CentaurHauls Given that "what CPUs does the kernel support" isn't useful for the "support everything" case, we can suppress these printk's. Signed-off-by: Dave Jones LKML-Reference: <20091113203000.GA19160@redhat.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b..617a29f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -656,6 +656,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) void __init early_cpu_init(void) { +#ifdef PROCESSOR_SELECT const struct cpu_dev *const *cdev; int count = 0; @@ -676,7 +677,7 @@ void __init early_cpu_init(void) cpudev->c_ident[j]); } } - +#endif early_identify_cpu(&boot_cpu_data); } -- cgit v0.10.2 From 688bcaff291cf2fe2734e43f2793d4d05b850518 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Nov 2009 01:12:47 +0100 Subject: hw-breakpoints: Fix build on !perf architectures the arch/alpha build fails with: In file included from tip/kernel/exit.c:52: tip/include/linux/hw_breakpoint.h: In function 'hw_breakpoint_addr': tip/include/linux/hw_breakpoint.h:21: error: 'struct perf_event' has no member named 'attr' [...] Move these helper inlines inside the CONFIG_HAVE_HW_BREAKPOINT ifdef. Cc: Frederic Weisbecker Cc: Prasad LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 18710e0..0b98cbf 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -16,6 +16,8 @@ enum { HW_BREAKPOINT_X = 4, }; +#ifdef CONFIG_HAVE_HW_BREAKPOINT + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -31,7 +33,6 @@ static inline int hw_breakpoint_len(struct perf_event *bp) return bp->attr.bp_len; } -#ifdef CONFIG_HAVE_HW_BREAKPOINT extern struct perf_event * register_user_hw_breakpoint(unsigned long addr, int len, -- cgit v0.10.2 From b01c845f0f2e3f9e54e6a78d5d56895f5b95e818 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 13 Nov 2009 14:38:26 -0800 Subject: x86: Remove CPU cache size output for non-Intel too As Dave Jones said about the output in intel_cacheinfo.c: "They aren't useful, and pollute the dmesg output a lot (especially on machines with many cores). Also the same information can be trivially found out from userspace." Give the generic display_cacheinfo() function the same treatment. Signed-off-by: Roland Dreier Acked-by: Dave Jones Cc: Mike Travis Cc: Andi Kleen Cc: Heiko Carstens Cc: Randy Dunlap Cc: Tejun Heo Cc: Greg Kroah-Hartman Cc: Yinghai Lu Cc: David Rientjes Cc: Steven Rostedt Cc: Rusty Russell Cc: Hidetoshi Seto Cc: Jack Steiner Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 617a29f..9db1e24 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) if (n >= 0x80000005) { cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); - printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", - edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); c->x86_cache_size = (ecx>>24) + (edx>>24); #ifdef CONFIG_X86_64 /* On K8 L1 TLB is inclusive, so don't count it */ @@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) #endif c->x86_cache_size = l2size; - - printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", - l2size, ecx & 0xFF); } void __cpuinit detect_ht(struct cpuinfo_x86 *c) -- cgit v0.10.2 From 811cb50baf63461ce0bdb234927046131fc7fa8b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Nov 2009 23:40:09 +0100 Subject: tracing: Fix event format export For some reason the export of the event print format to userspace uses '#fmt' which breaks if the format string is anything but a plain string, for example if it is built with macros then the macro names are exported instead of their contents. Use "\"%s\"", fmt instead of "%s", #fmt to export the string and not the way it is built. For example, in net/mac80211/driver-trace.h for the trace event drv_start there is: TP_printk( LOCAL_PR_FMT, LOCAL_PR_ARG ) Which use to produce: print fmt: LOCAL_PR_FMT, REC->wiphy_name Now produces: print fmt: "%s", REC->wiphy_name Signed-off-by: Johannes Berg LKML-Reference: <20091113224009.GB23942@elte.hu> Signed-off-by: Steven Rostedt diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index cc0d966..dacb8ef 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -159,7 +159,7 @@ #undef __get_str #undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) +#define TP_printk(fmt, args...) "\"%s\", %s\n", fmt, __stringify(args) #undef TP_fast_assign #define TP_fast_assign(args...) args -- cgit v0.10.2 From 4c49b12853fbb5eff4849b7b6a1e895776f027a1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 13 Nov 2009 21:47:33 -0800 Subject: perf_event: Fix invalid type in ioctl definition u64 is invalid in userspace headers, including ioctl definitions; use __u64 instead Signed-off-by: Arjan van de Ven Cc: LKML-Reference: <20091113214733.7cd76be9@infradead.org> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 45b56fa..ec3768a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -219,7 +219,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_DISABLE _IO ('$', 1) #define PERF_EVENT_IOC_REFRESH _IO ('$', 2) #define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) #define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -- cgit v0.10.2 From 560d4bc0df9a5e63b980432282d8c2bd3559ec74 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Nov 2009 19:51:38 -0800 Subject: rcu: Further cleanups of use of lastcomp Now that a copy of the rsp->completed flag is available in all rcu_node structures, make full use of it. It is still legitimate to access rsp->completed while holding the root rcu_node structure's lock, however. Also, tighten up force_quiescent_state()'s checks for end of current grace period. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1258170699933-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 3df0438..24bbf2c 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -817,7 +817,7 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) rnp = rdp->mynode; spin_lock_irqsave(&rnp->lock, flags); - if (lastcomp != ACCESS_ONCE(rsp->completed)) { + if (lastcomp != rnp->completed) { /* * Someone beat us to it for this grace period, so leave. @@ -935,7 +935,6 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) { unsigned long flags; - long lastcomp; unsigned long mask; struct rcu_data *rdp = rsp->rda[cpu]; struct rcu_node *rnp; @@ -971,7 +970,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) spin_unlock(&rnp->lock); /* irqs remain disabled. */ rnp = rnp->parent; } while (rnp != NULL); - lastcomp = rsp->completed; spin_unlock_irqrestore(&rsp->onofflock, flags); @@ -1145,7 +1143,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, rcu_for_each_leaf_node(rsp, rnp) { mask = 0; spin_lock_irqsave(&rnp->lock, flags); - if (rsp->completed != lastcomp) { + if (rnp->completed != lastcomp) { spin_unlock_irqrestore(&rnp->lock, flags); return 1; } @@ -1159,7 +1157,7 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) mask |= bit; } - if (mask != 0 && rsp->completed == lastcomp) { + if (mask != 0 && rnp->completed == lastcomp) { /* cpu_quiet_msk() releases rnp->lock. */ cpu_quiet_msk(mask, rsp, rnp, flags); @@ -1196,7 +1194,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) lastcomp = rsp->gpnum - 1; signaled = rsp->signaled; rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; - if (lastcomp == rsp->gpnum) { + if(!rcu_gp_in_progress(rsp)) { rsp->n_force_qs_ngp++; spin_unlock(&rnp->lock); goto unlock_ret; /* no GP in progress, time updated. */ @@ -1224,7 +1222,8 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) /* Update state, record completion counter. */ forcenow = 0; spin_lock(&rnp->lock); - if (lastcomp == rsp->completed && + if (lastcomp + 1 == rsp->gpnum && + lastcomp == rsp->completed && rsp->signaled == signaled) { rsp->signaled = RCU_FORCE_QS; rsp->completed_fqs = lastcomp; -- cgit v0.10.2 From 2f51f9884f6a36b0fe9636d5a1937e5cbd25723b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Nov 2009 19:51:39 -0800 Subject: rcu: Eliminate __rcu_pending() false positives Now that there are both ->gpnum and ->completed fields in the rcu_node structure, __rcu_pending() should check rdp->gpnum and rdp->completed against rnp->gpnum and rdp->completed, respectively, instead of the prior comparison against the rcu_state fields rsp->gpnum and rsp->completed. Given the old comparison, __rcu_pending() could return 1, resulting in a needless raise_softirq(RCU_SOFTIRQ). This useless work would happen if RCU responded to a scheduling-clock interrupt after the rcu_state fields had been updated, but before the rcu_node fields had been updated. Changing the comparison from the rcu_state fields to the rcu_node fields prevents this useless work from happening. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12581706991966-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 24bbf2c..9b36d6d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1403,6 +1403,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); */ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) { + struct rcu_node *rnp = rdp->mynode; + rdp->n_rcu_pending++; /* Check for CPU stalls, if enabled. */ @@ -1427,13 +1429,13 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) } /* Has another RCU grace period completed? */ - if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */ + if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */ rdp->n_rp_gp_completed++; return 1; } /* Has a new RCU grace period started? */ - if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */ + if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */ rdp->n_rp_gp_started++; return 1; } -- cgit v0.10.2 From 31c997cac76e62918858a432fff6e43fd48425f9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Nov 2009 10:34:41 +0100 Subject: x86: Fix cpu_devs[] initialization in early_cpu_init() Yinghai Lu noticed that this commit: 0388423: x86: Minimise printk spew from per-vendor init code mistakenly left out the initialization of cpu_devs[] in the !PROCESSOR_SELECT case. Fix it. Reported-by: Yinghai Lu Cc: Dave Jones LKML-Reference: <20091113203000.GA19160@redhat.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9db1e24..61242a5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -651,28 +651,34 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) void __init early_cpu_init(void) { -#ifdef PROCESSOR_SELECT const struct cpu_dev *const *cdev; int count = 0; +#ifdef PROCESSOR_SELECT printk(KERN_INFO "KERNEL supported cpus:\n"); +#endif + for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { const struct cpu_dev *cpudev = *cdev; - unsigned int j; if (count >= X86_VENDOR_NUM) break; cpu_devs[count] = cpudev; count++; - for (j = 0; j < 2; j++) { - if (!cpudev->c_ident[j]) - continue; - printk(KERN_INFO " %s %s\n", cpudev->c_vendor, - cpudev->c_ident[j]); +#ifdef PROCESSOR_SELECT + { + unsigned int j; + + for (j = 0; j < 2; j++) { + if (!cpudev->c_ident[j]) + continue; + printk(KERN_INFO " %s %s\n", cpudev->c_vendor, + cpudev->c_ident[j]); + } } - } #endif + } early_identify_cpu(&boot_cpu_data); } -- cgit v0.10.2 From 68efa37df779b3e04280598e8b5b3a1919b65fee Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Nov 2009 01:35:29 +0100 Subject: hw-breakpoints, x86: Fix modular KVM build This build error: arch/x86/kvm/x86.c:3655: error: implicit declaration of function 'hw_breakpoint_restore' Happens because in the CONFIG_KVM=m case there's no 'CONFIG_KVM' define in the kernel - it's CONFIG_KVM_MODULE in that case. Make the prototype available unconditionally. Cc: Frederic Weisbecker Cc: Prasad LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 0f6e92a..fdabd84 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -96,9 +96,7 @@ static inline int hw_breakpoint_active(void) extern void aout_dump_debugregs(struct user *dump); -#ifdef CONFIG_KVM extern void hw_breakpoint_restore(void); -#endif #endif /* __KERNEL__ */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 57dcee5f..752daeb 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -43,6 +43,7 @@ /* Per cpu debug control register value */ DEFINE_PER_CPU(unsigned long, dr7); +EXPORT_PER_CPU_SYMBOL(dr7); /* Per cpu debug address registers values */ static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); @@ -409,6 +410,7 @@ void aout_dump_debugregs(struct user *dump) dump->u_debugreg[7] = dr7; } +EXPORT_SYMBOL_GPL(aout_dump_debugregs); /* * Release the user breakpoints used by ptrace @@ -424,7 +426,6 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) } } -#ifdef CONFIG_KVM void hw_breakpoint_restore(void) { set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); @@ -435,7 +436,6 @@ void hw_breakpoint_restore(void) set_debugreg(__get_cpu_var(dr7), 7); } EXPORT_SYMBOL_GPL(hw_breakpoint_restore); -#endif /* * Handle debug exception notifications. -- cgit v0.10.2 From a3b28ee1090072092e2be043c24df94230e725b2 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 14 Nov 2009 20:46:36 +0900 Subject: x86: Set dma_ops to nommu_dma_ops by default We set dma_ops to nommu_dma_ops at two different places for x86_32 and x86_64. This unifies them by setting dma_ops to nommu_dma_ops by default. Signed-off-by: FUJITA Tomonori LKML-Reference: <1258199198-16657-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0b11bf1..f170b53 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -15,7 +15,7 @@ static int forbid_dac __read_mostly; -struct dma_map_ops *dma_ops; +struct dma_map_ops *dma_ops = &nommu_dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -128,8 +128,6 @@ void __init pci_iommu_alloc(void) #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); -#else - dma_ops = &nommu_dma_ops; #endif if (pci_swiotlb_init()) return; diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index a6e5d0f..e36e71d 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -61,8 +61,7 @@ int __init pci_swiotlb_init(void) if (swiotlb) { swiotlb_init(0); dma_ops = &swiotlb_dma_ops; - } else - dma_ops = &nommu_dma_ops; + } return swiotlb_force; } -- cgit v0.10.2 From 94a15564ac63af6bb2ff8d4d04f86d5e7ee0278a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 14 Nov 2009 20:46:37 +0900 Subject: x86: Move iommu_shutdown_noop to x86_init.c iommu_init_noop() is in arch/x86/kernel/x86_init.c but iommu_shutdown_noop() in arch/x86/include/asm/iommu.h. This moves iommu_shutdown_noop() to x86_init.c for consistency. Signed-off-by: FUJITA Tomonori LKML-Reference: <1258199198-16657-3-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index df42a71..345c99c 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -static inline void iommu_shutdown_noop(void) {} extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index c46984d..80f3ae2 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -20,6 +20,7 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } void __init x86_init_pgd_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } +void __init iommu_shutdown_noop(void) { } /* * The platform setup functions are preset with the default functions -- cgit v0.10.2 From 6959450e567c1f17d3ce8489099fc56c3721d577 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 14 Nov 2009 20:46:38 +0900 Subject: swiotlb: Remove duplicate swiotlb_force extern declarations Signed-off-by: FUJITA Tomonori Cc: tony.luck@intel.com LKML-Reference: <1258199198-16657-4-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h index dcbaea7..f0acde6 100644 --- a/arch/ia64/include/asm/swiotlb.h +++ b/arch/ia64/include/asm/swiotlb.h @@ -4,8 +4,6 @@ #include #include -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; extern void pci_swiotlb_init(void); diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 940f13a..87ffcb1 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -3,10 +3,6 @@ #include -/* SWIOTLB interface */ - -extern int swiotlb_force; - #ifdef CONFIG_SWIOTLB extern int swiotlb; extern int pci_swiotlb_init(void); diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index eb9bdb4..febedcf 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -7,6 +7,8 @@ struct device; struct dma_attrs; struct scatterlist; +extern int swiotlb_force; + /* * Maximum allowable number of contiguous slabs to map, * must be a power of 2. What is the appropriate value ? -- cgit v0.10.2 From f4131c6259b46bd84dcfcd3bb9ed08e99e2875a4 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 14 Nov 2009 21:26:50 +0900 Subject: x86: Make calgary_iommu_init() static This makes calgary_iommu_init() static and moves it to remove the forward declaration. Signed-off-by: FUJITA Tomonori Cc: muli@il.ibm.com LKML-Reference: <20091114212603U.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 833f491..c84ad03 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1345,7 +1345,24 @@ static void __init get_tce_space_from_tar(void) return; } -int __init calgary_iommu_init(void); +static int __init calgary_iommu_init(void) +{ + int ret; + + /* ok, we're trying to use Calgary - let's roll */ + printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); + + ret = calgary_init(); + if (ret) { + printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " + "falling back to no_iommu\n", ret); + return ret; + } + + bad_dma_address = 0x0; + + return 0; +} void __init detect_calgary(void) { @@ -1458,25 +1475,6 @@ cleanup: } } -int __init calgary_iommu_init(void) -{ - int ret; - - /* ok, we're trying to use Calgary - let's roll */ - printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); - - ret = calgary_init(); - if (ret) { - printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " - "falling back to no_iommu\n", ret); - return ret; - } - - bad_dma_address = 0x0; - - return 0; -} - static int __init calgary_parse_options(char *p) { unsigned int bridge; -- cgit v0.10.2 From 14722485830fe6baba738b91d96f06fbd6cf7a18 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 13 Nov 2009 11:56:24 +0000 Subject: x86-64: __copy_from_user_inatomic() adjustments This v2.6.26 commit: ad2fc2c: x86: fix copy_user on x86 rendered __copy_from_user_inatomic() identical to copy_user_generic(), yet didn't make the former just call the latter from an inline function. Furthermore, this v2.6.19 commit: b885808: [PATCH] Add proper sparse __user casts to __copy_to_user_inatomic converted the return type of __copy_to_user_inatomic() from unsigned long to int, but didn't do the same to __copy_from_user_inatomic(). Signed-off-by: Jan Beulich Cc: Linus Torvalds Cc: Alexander Viro Cc: Arjan van de Ven Cc: Andi Kleen Cc: LKML-Reference: <4AFD5778020000780001F8F4@vpn.id2.novell.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index ce6fec7..7adebac 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -193,8 +193,11 @@ __must_check long strlen_user(const char __user *str); __must_check unsigned long clear_user(void __user *mem, unsigned long len); __must_check unsigned long __clear_user(void __user *mem, unsigned long len); -__must_check long __copy_from_user_inatomic(void *dst, const void __user *src, - unsigned size); +static __must_check __always_inline int +__copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) +{ + return copy_user_generic(dst, (__force const void *)src, size); +} static __must_check __always_inline int __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index a0cdd8c..cd54276 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -32,7 +32,6 @@ EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(__copy_user_nocache); EXPORT_SYMBOL(_copy_from_user); EXPORT_SYMBOL(copy_to_user); -EXPORT_SYMBOL(__copy_from_user_inatomic); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 4be3c41..3936998 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -96,12 +96,6 @@ ENTRY(copy_user_generic) CFI_ENDPROC ENDPROC(copy_user_generic) -ENTRY(__copy_from_user_inatomic) - CFI_STARTPROC - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(__copy_from_user_inatomic) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) -- cgit v0.10.2 From 498657a478c60be092208422fefa9c7b248729c2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Nov 2009 18:33:53 +0900 Subject: sched, kvm: Fix race condition involving sched_in_preempt_notifers In finish_task_switch(), fire_sched_in_preempt_notifiers() is called after finish_lock_switch(). However, depending on architecture, preemption can be enabled after finish_lock_switch() which breaks the semantics of preempt notifiers. So move it before finish_arch_switch(). This also makes the in- notifiers symmetric to out- notifiers in terms of locking - now both are called under rq lock. Signed-off-by: Tejun Heo Acked-by: Avi Kivity Cc: Peter Zijlstra LKML-Reference: <4AFD2801.7020900@kernel.org> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 701eca4..cea2bea 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2758,9 +2758,9 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) prev_state = prev->state; finish_arch_switch(prev); perf_event_task_sched_in(current, cpu_of(rq)); + fire_sched_in_preempt_notifiers(current); finish_lock_switch(rq, prev); - fire_sched_in_preempt_notifiers(current); if (mm) mmdrop(mm); if (unlikely(prev_state == TASK_DEAD)) { -- cgit v0.10.2 From d2fb8b4151a92223da6a84006f8f248ebeb6677d Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Sun, 15 Nov 2009 20:36:53 +0900 Subject: perf tools: Add new perf_atoll() function to parse string representing size in bytes This patch modifies util/string.[ch] to add new function: perf_atoll() to parse string representing size in bytes. This function parses (\d+)(b|B|kb|KB|mb|MB|gb|GB) (e.g. "256MB") and returns its numeric value. (e.g. 268435456) Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1258285013-4759-1-git-send-email-mitake@dcl.info.waseda.ac.jp> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 04743d3..2270435 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -1,5 +1,7 @@ #include +#include #include "string.h" +#include "util.h" static int hex(char ch) { @@ -43,3 +45,85 @@ char *strxfrchar(char *s, char from, char to) return s; } + +#define K 1024LL +/* + * perf_atoll() + * Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB") + * and return its numeric value + */ +s64 perf_atoll(const char *str) +{ + unsigned int i; + s64 length = -1, unit = 1; + + if (!isdigit(str[0])) + goto out_err; + + for (i = 1; i < strlen(str); i++) { + switch (str[i]) { + case 'B': + case 'b': + break; + case 'K': + if (str[i + 1] != 'B') + goto out_err; + else + goto kilo; + case 'k': + if (str[i + 1] != 'b') + goto out_err; +kilo: + unit = K; + break; + case 'M': + if (str[i + 1] != 'B') + goto out_err; + else + goto mega; + case 'm': + if (str[i + 1] != 'b') + goto out_err; +mega: + unit = K * K; + break; + case 'G': + if (str[i + 1] != 'B') + goto out_err; + else + goto giga; + case 'g': + if (str[i + 1] != 'b') + goto out_err; +giga: + unit = K * K * K; + break; + case 'T': + if (str[i + 1] != 'B') + goto out_err; + else + goto tera; + case 't': + if (str[i + 1] != 'b') + goto out_err; +tera: + unit = K * K * K * K; + break; + case '\0': /* only specified figures */ + unit = 1; + break; + default: + if (!isdigit(str[i])) + goto out_err; + break; + } + } + + length = atoll(str) * unit; + goto out; + +out_err: + length = -1; +out: + return length; +} diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index 2c84bf6..e50b07f 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h @@ -5,6 +5,7 @@ int hex2u64(const char *ptr, u64 *val); char *strxfrchar(char *s, char from, char to); +s64 perf_atoll(const char *str); #define _STR(x) #x #define STR(x) _STR(x) -- cgit v0.10.2 From 7255fe2a42c612f2b8fe4c347f0a5f0c97d85a46 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Sun, 15 Nov 2009 12:05:08 -0200 Subject: perf stat: Do not print ratio when task-clock event is not counted The ratio between the number of events and the time elapsed makes sense only if task-clock event is counted. Otherwise it will be simply a (confusing) # 0.000 M/sec This patch outputs the ratio only if task-clock event is counted. Some test examples of before and after: Before: [lucas@skywalker linux.trees.git]$ sudo perf stat -e branch-misses -a -- sleep 1 Performance counter stats for 'sleep 1': 1367818 branch-misses # 0.000 M/sec 1.001494325 seconds time elapsed After (without task-clock): [lucas@skywalker perf]$ sudo ./perf stat -e branch-misses -a -- sleep 1 Performance counter stats for 'sleep 1': 1135044 branch-misses 1.001370775 seconds time elapsed After (with task-clock): [lucas@skywalker perf]$ sudo ./perf stat -e branch-misses -e task-clock -a -- sleep 1 Performance counter stats for 'sleep 1': 1070111 branch-misses # 0.534 M/sec 2002.730893 task-clock-msecs # 1.999 CPUs 1.001640292 seconds time elapsed Signed-off-by: Lucas De Marchi Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091115140507.GB21561@skywalker.lan> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c6df377..c70d720 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -357,7 +357,8 @@ static void abs_printout(int counter, double avg) ratio = avg / total; fprintf(stderr, " # %10.3f IPC ", ratio); - } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter)) { + } else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) && + runtime_branches_stats.n != 0) { total = avg_stats(&runtime_branches_stats); if (total) @@ -365,7 +366,7 @@ static void abs_printout(int counter, double avg) fprintf(stderr, " # %10.3f %% ", ratio); - } else { + } else if (runtime_nsecs_stats.n != 0) { total = avg_stats(&runtime_nsecs_stats); if (total) -- cgit v0.10.2 From 62ad33f67003b9a7b6013f0511579b9805e11626 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 16 Nov 2009 11:44:30 +0900 Subject: x86: Don't put iommu_shutdown_noop() in init section It causes kernel panic on shutdown or reboot. Signed-off-by: Hiroshi Shimamoto Acked-by: FUJITA Tomonori LKML-Reference: <4B00BC8E.50801@ct.jp.nec.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 80f3ae2..d11c5ff 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -20,7 +20,7 @@ void __cpuinit x86_init_noop(void) { } void __init x86_init_uint_noop(unsigned int unused) { } void __init x86_init_pgd_noop(pgd_t *unused) { } int __init iommu_init_noop(void) { return 0; } -void __init iommu_shutdown_noop(void) { } +void iommu_shutdown_noop(void) { } /* * The platform setup functions are preset with the default functions -- cgit v0.10.2 From 047106adcc85e3023da210143a6ab8a55df9e0fc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Nov 2009 10:28:09 +0100 Subject: sched: Sched_rt_periodic_timer vs cpu hotplug Heiko reported a case where a timer interrupt managed to reference a root_domain structure that was already freed by a concurrent hot-un-plug operation. Solve this like the regular sched_domain stuff is also synchronized, by adding a synchronize_sched() stmt to the free path, this ensures that a root_domain stays present for any atomic section that could have observed it. Reported-by: Heiko Carstens Signed-off-by: Peter Zijlstra Acked-by: Heiko Carstens Cc: Gregory Haskins Cc: Siddha Suresh B Cc: Martin Schwidefsky LKML-Reference: <1258363873.26714.83.camel@laptop> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index cea2bea..3c91f11 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7912,6 +7912,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) static void free_rootdomain(struct root_domain *rd) { + synchronize_sched(); + cpupri_cleanup(&rd->cpupri); free_cpumask_var(rd->rto_mask); -- cgit v0.10.2 From 559fdc3c1b624edb1933a875022fe7e27934d11c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 16 Nov 2009 12:45:14 +0100 Subject: perf_event: Optimize perf_output_lock() The purpose of perf_output_{un,}lock() is to: 1) avoid publishing incomplete data [ possible when publishing a head that is ahead of an entry that is still being written ] 2) guarantee fwd progress [ a simple refcount on pending writers doesn't need to drop to 0, making it so would end up implementing something like forced quiecent states of RCU ] To satisfy the above without undue complexity it serializes between CPUs, this means that a pending writer can only be the same cpu in a nested context, and since (under normal operation) a cpu always makes progress we're good -- if the head is only published when the bottom most writer completes. Now we don't need to disable IRQs in order to serialize between CPUs, disabling preemption ought to be sufficient, esp since we already deal with nesting due to NMIs. This avoids potentially expensive (and needless) local IRQ disable/enable ops. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <1258373161.26714.254.camel@laptop> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index df4e73e..7f87563 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -714,7 +714,6 @@ struct perf_output_handle { int nmi; int sample; int locked; - unsigned long flags; }; #ifdef CONFIG_PERF_EVENTS diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 6f4ed3b..3256e36 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2674,20 +2674,21 @@ static void perf_output_wakeup(struct perf_output_handle *handle) static void perf_output_lock(struct perf_output_handle *handle) { struct perf_mmap_data *data = handle->data; - int cpu; + int cur, cpu = get_cpu(); handle->locked = 0; - local_irq_save(handle->flags); - cpu = smp_processor_id(); - - if (in_nmi() && atomic_read(&data->lock) == cpu) - return; + for (;;) { + cur = atomic_cmpxchg(&data->lock, -1, cpu); + if (cur == -1) { + handle->locked = 1; + break; + } + if (cur == cpu) + break; - while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) cpu_relax(); - - handle->locked = 1; + } } static void perf_output_unlock(struct perf_output_handle *handle) @@ -2733,7 +2734,7 @@ again: if (atomic_xchg(&data->wakeup, 0)) perf_output_wakeup(handle); out: - local_irq_restore(handle->flags); + put_cpu(); } void perf_output_copy(struct perf_output_handle *handle, -- cgit v0.10.2 From 3c93ca00eeeb774c7dd666cc7286a9e90c53e998 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 16 Nov 2009 15:42:18 +0100 Subject: x86: Add missing might_fault() checks to copy_{to,from}_user() On x86-64, copy_[to|from]_user() rely on assembly routines that never call might_fault(), making us missing various lockdep checks. This doesn't apply to __copy_from,to_user() that explicitly handle these calls, neither is it a problem in x86-32 where copy_to,from_user() rely on the "__" prefixed versions that also call might_fault(). Signed-off-by: Frederic Weisbecker Cc: Arjan van de Ven Cc: Linus Torvalds Cc: Nick Piggin Cc: Peter Zijlstra LKML-Reference: <1258382538-30979-1-git-send-email-fweisbec@gmail.com> [ v2: fix module export ] Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 7adebac..46324c6 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -19,7 +19,7 @@ __must_check unsigned long copy_user_generic(void *to, const void *from, unsigned len); __must_check unsigned long -copy_to_user(void __user *to, const void *from, unsigned len); +_copy_to_user(void __user *to, const void *from, unsigned len); __must_check unsigned long _copy_from_user(void *to, const void __user *from, unsigned len); __must_check unsigned long @@ -32,6 +32,7 @@ static inline unsigned long __must_check copy_from_user(void *to, int sz = __compiletime_object_size(to); int ret = -EFAULT; + might_fault(); if (likely(sz == -1 || sz >= n)) ret = _copy_from_user(to, from, n); #ifdef CONFIG_DEBUG_VM @@ -41,6 +42,13 @@ static inline unsigned long __must_check copy_from_user(void *to, return ret; } +static __always_inline __must_check +int copy_to_user(void __user *dst, const void *src, unsigned size) +{ + might_fault(); + + return _copy_to_user(dst, src, size); +} static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index cd54276..a102976 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -31,7 +31,7 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(copy_user_generic); EXPORT_SYMBOL(__copy_user_nocache); EXPORT_SYMBOL(_copy_from_user); -EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(_copy_to_user); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index 3936998..cf889d4 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -65,7 +65,7 @@ .endm /* Standard copy_to_user with segment limit checking */ -ENTRY(copy_to_user) +ENTRY(_copy_to_user) CFI_STARTPROC GET_THREAD_INFO(%rax) movq %rdi,%rcx @@ -75,7 +75,7 @@ ENTRY(copy_to_user) jae bad_to_user ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC -ENDPROC(copy_to_user) +ENDPROC(_copy_to_user) /* Standard copy_from_user with segment limit checking */ ENTRY(_copy_from_user) -- cgit v0.10.2 From e79c65a97c01d5da4317f44f9f98b3814e091a43 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 16 Nov 2009 18:14:26 +0300 Subject: x86: io-apic: IO-APIC MMIO should not fail on resource insertion If IO-APIC base address is 1K aligned we should not fail on resourse insertion procedure. For this sake we define IO_APIC_SLOT_SIZE constant which should cover all IO-APIC direct accessible registers. An example of a such configuration is there http://marc.info/?l=linux-kernel&m=118114792006520 | | Quoting the message | | IOAPIC[0]: apic_id 2, version 32, address 0xfec00000, GSI 0-23 | IOAPIC[1]: apic_id 3, version 32, address 0xfec80000, GSI 24-47 | IOAPIC[2]: apic_id 4, version 32, address 0xfec80400, GSI 48-71 | IOAPIC[3]: apic_id 5, version 32, address 0xfec84000, GSI 72-95 | IOAPIC[4]: apic_id 8, version 32, address 0xfec84400, GSI 96-119 | Reported-by: "Maciej W. Rozycki" Signed-off-by: Cyrill Gorcunov Acked-by: Yinghai Lu LKML-Reference: <20091116151426.GC5653@lenovo> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 3b62da9..7fe3b30 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -11,6 +11,12 @@ #define IO_APIC_DEFAULT_PHYS_BASE 0xfec00000 #define APIC_DEFAULT_PHYS_BASE 0xfee00000 +/* + * This is the IO-APIC register space as specified + * by Intel docs: + */ +#define IO_APIC_SLOT_SIZE 1024 + #define APIC_ID 0x20 #define APIC_LVR 0x30 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 20ea839..ff23719 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4100,18 +4100,17 @@ void __init ioapic_init_mappings(void) #ifdef CONFIG_X86_32 fake_ioapic_page: #endif - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); + apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), + ioapic_phys); idx++; ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + PAGE_SIZE-1; + ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } } -- cgit v0.10.2 From 82164161679c448f33092945ea97cb547a13683a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Nov 2009 13:48:11 -0200 Subject: perf symbols: Call the symbol filter in dso__synthesize_plt_symbols() We need to pass the symbol to the filter so that, for instance, 'perf top' can do filtering and also set the private area it manages, setting the ->map pointer, etc. I found this while running 'perf top' on a machine where hits happened on PLT symbols, where ->map wasn't being set up and segfaults thus happened. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258386491-20278-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9c286db..93e4b52 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -281,6 +281,11 @@ static int kernel_maps__load_all_kallsyms(void) if (sym == NULL) goto out_delete_line; + /* + * We will pass the symbols to the filter later, in + * kernel_maps__split_kallsyms, when we have split the + * maps per module + */ dso__insert_symbol(kernel_map->dso, sym); } @@ -555,7 +560,8 @@ static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, * And always look at the original dso, not at debuginfo packages, that * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). */ -static int dso__synthesize_plt_symbols(struct dso *self) +static int dso__synthesize_plt_symbols(struct dso *self, struct map *map, + symbol_filter_t filter) { uint32_t nr_rel_entries, idx; GElf_Sym sym; @@ -643,8 +649,12 @@ static int dso__synthesize_plt_symbols(struct dso *self) if (!f) goto out_elf_end; - dso__insert_symbol(self, f); - ++nr; + if (filter && filter(map, f)) + symbol__delete(f); + else { + dso__insert_symbol(self, f); + ++nr; + } } } else if (shdr_rel_plt.sh_type == SHT_REL) { GElf_Rel pos_mem, *pos; @@ -661,8 +671,12 @@ static int dso__synthesize_plt_symbols(struct dso *self) if (!f) goto out_elf_end; - dso__insert_symbol(self, f); - ++nr; + if (filter && filter(map, f)) + symbol__delete(f); + else { + dso__insert_symbol(self, f); + ++nr; + } } } @@ -1050,7 +1064,7 @@ compare_build_id: goto more; if (ret > 0) { - int nr_plt = dso__synthesize_plt_symbols(self); + int nr_plt = dso__synthesize_plt_symbols(self, map, filter); if (nr_plt > 0) ret += nr_plt; } -- cgit v0.10.2 From 84fe8488ade7922afa9f3aa77c22d2d92beb9660 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Nov 2009 16:32:41 -0200 Subject: perf symbols: Pass the offset to perf_header__read_build_ids() Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258396365-29217-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index aacb814..14cb846 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -70,11 +70,10 @@ process_event(event_t *event, unsigned long offset, unsigned long head) } } -int perf_header__read_build_ids(int input, off_t size) +int perf_header__read_build_ids(int input, off_t offset, off_t size) { struct build_id_event bev; char filename[PATH_MAX]; - off_t offset = lseek(input, 0, SEEK_CUR); off_t limit = offset + size; int err = -1; diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h index 20b4037..ae036ec 100644 --- a/tools/perf/util/data_map.h +++ b/tools/perf/util/data_map.h @@ -27,6 +27,6 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, int full_paths, int *cwdlen, char **cwd); -int perf_header__read_build_ids(int input, off_t file_size); +int perf_header__read_build_ids(int input, off_t offset, off_t file_size); #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ebed4f4..ca0d657 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -359,7 +359,7 @@ static void perf_header__adds_read(struct perf_header *self, int fd) buildid_sec = &feat_sec[idx++]; lseek(fd, buildid_sec->offset, SEEK_SET); - if (perf_header__read_build_ids(fd, buildid_sec->size)) + if (perf_header__read_build_ids(fd, buildid_sec->offset, buildid_sec->size)) pr_debug("failed to read buildids, continuing...\n"); } -- cgit v0.10.2 From 8f41146aedf803856fb6477056e3960cb9ba8f9c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Nov 2009 16:32:42 -0200 Subject: perf tools: Debug.h needs to include event.h for event_t Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258396365-29217-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index e8b18a1..c6c24c5 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -2,6 +2,8 @@ #ifndef __PERF_DEBUG_H #define __PERF_DEBUG_H +#include "event.h" + extern int verbose; extern int dump_trace; -- cgit v0.10.2 From 37562eac3767c7f07bb1a1329708ff6453e47570 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Nov 2009 16:32:43 -0200 Subject: perf tools: Generalize perf_header__adds_read() Renaming it to perf_header__process_sections() and passing a callback to handle each feature. The next changesets will introduce 'perf buildid-list' that will handle just the HEADER_BUILD_ID table, ignoring all the other features. Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258396365-29217-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ca0d657..d8416f0 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -128,26 +128,11 @@ static const char *__perf_magic = "PERFFILE"; #define PERF_MAGIC (*(u64 *)__perf_magic) -struct perf_file_section { - u64 offset; - u64 size; -}; - struct perf_file_attr { struct perf_event_attr attr; struct perf_file_section ids; }; -struct perf_file_header { - u64 magic; - u64 size; - u64 attr_size; - struct perf_file_section attrs; - struct perf_file_section data; - struct perf_file_section event_types; - DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); -}; - void perf_header__set_feat(struct perf_header *self, int feat) { set_bit(feat, self->adds_features); @@ -324,21 +309,23 @@ static void do_read(int fd, void *buf, size_t size) } } -static void perf_header__adds_read(struct perf_header *self, int fd) +int perf_header__process_sections(struct perf_header *self, int fd, + int (*process)(struct perf_file_section *self, + int feat, int fd)) { struct perf_file_section *feat_sec; int nr_sections; int sec_size; int idx = 0; - + int err = 0, feat = 1; nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); if (!nr_sections) - return; + return 0; feat_sec = calloc(sizeof(*feat_sec), nr_sections); if (!feat_sec) - die("No memory"); + return -1; sec_size = sizeof(*feat_sec) * nr_sections; @@ -346,25 +333,73 @@ static void perf_header__adds_read(struct perf_header *self, int fd) do_read(fd, feat_sec, sec_size); - if (perf_header__has_feat(self, HEADER_TRACE_INFO)) { - struct perf_file_section *trace_sec; + while (idx < nr_sections && feat < HEADER_LAST_FEATURE) { + if (perf_header__has_feat(self, feat)) { + struct perf_file_section *sec = &feat_sec[idx++]; - trace_sec = &feat_sec[idx++]; - lseek(fd, trace_sec->offset, SEEK_SET); - trace_report(fd); + err = process(sec, feat, fd); + if (err < 0) + break; + } + ++feat; } - if (perf_header__has_feat(self, HEADER_BUILD_ID)) { - struct perf_file_section *buildid_sec; + free(feat_sec); + return err; +}; - buildid_sec = &feat_sec[idx++]; - lseek(fd, buildid_sec->offset, SEEK_SET); - if (perf_header__read_build_ids(fd, buildid_sec->offset, buildid_sec->size)) - pr_debug("failed to read buildids, continuing...\n"); +int perf_file_header__read(struct perf_file_header *self, + struct perf_header *ph, int fd) +{ + lseek(fd, 0, SEEK_SET); + do_read(fd, self, sizeof(*self)); + + if (self->magic != PERF_MAGIC || + self->attr_size != sizeof(struct perf_file_attr)) + return -1; + + if (self->size != sizeof(*self)) { + /* Support the previous format */ + if (self->size == offsetof(typeof(*self), adds_features)) + bitmap_zero(self->adds_features, HEADER_FEAT_BITS); + else + return -1; } - free(feat_sec); -}; + memcpy(&ph->adds_features, &self->adds_features, + sizeof(self->adds_features)); + + ph->event_offset = self->event_types.offset; + ph->event_size = self->event_types.size; + ph->data_offset = self->data.offset; + ph->data_size = self->data.size; + return 0; +} + +static int perf_file_section__process(struct perf_file_section *self, + int feat, int fd) +{ + if (lseek(fd, self->offset, SEEK_SET) < 0) { + pr_debug("Failed to lseek to %Ld offset for feature %d, " + "continuing...\n", self->offset, feat); + return 0; + } + + switch (feat) { + case HEADER_TRACE_INFO: + trace_report(fd); + break; + + case HEADER_BUILD_ID: + if (perf_header__read_build_ids(fd, self->offset, self->size)) + pr_debug("Failed to read buildids, continuing...\n"); + break; + default: + pr_debug("unknown feature %d, continuing...\n", feat); + } + + return 0; +} struct perf_header *perf_header__read(int fd) { @@ -372,23 +407,11 @@ struct perf_header *perf_header__read(int fd) struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; - int nr_attrs, nr_ids, i, j; - lseek(fd, 0, SEEK_SET); - do_read(fd, &f_header, sizeof(f_header)); - - if (f_header.magic != PERF_MAGIC || - f_header.attr_size != sizeof(f_attr)) + if (perf_file_header__read(&f_header, self, fd) < 0) die("incompatible file format"); - if (f_header.size != sizeof(f_header)) { - /* Support the previous format */ - if (f_header.size == offsetof(typeof(f_header), adds_features)) - bitmap_zero(f_header.adds_features, HEADER_FEAT_BITS); - else - die("incompatible file format"); - } nr_attrs = f_header.attrs.size / sizeof(f_attr); lseek(fd, f_header.attrs.offset, SEEK_SET); @@ -422,15 +445,7 @@ struct perf_header *perf_header__read(int fd) event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } - memcpy(&self->adds_features, &f_header.adds_features, sizeof(f_header.adds_features)); - - self->event_offset = f_header.event_types.offset; - self->event_size = f_header.event_types.size; - - self->data_offset = f_header.data.offset; - self->data_size = f_header.data.size; - - perf_header__adds_read(self, fd); + perf_header__process_sections(self, fd, perf_file_section__process); lseek(fd, self->data_offset, SEEK_SET); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index a22d70b..f1b3bf7 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -15,11 +15,34 @@ struct perf_header_attr { off_t id_offset; }; -#define HEADER_TRACE_INFO 1 -#define HEADER_BUILD_ID 2 +enum { + HEADER_TRACE_INFO = 1, + HEADER_BUILD_ID, + HEADER_LAST_FEATURE, +}; #define HEADER_FEAT_BITS 256 +struct perf_file_section { + u64 offset; + u64 size; +}; + +struct perf_file_header { + u64 magic; + u64 size; + u64 attr_size; + struct perf_file_section attrs; + struct perf_file_section data; + struct perf_file_section event_types; + DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); +}; + +struct perf_header; + +int perf_file_header__read(struct perf_file_header *self, + struct perf_header *ph, int fd); + struct perf_header { int frozen; int attrs, size; @@ -54,4 +77,8 @@ bool perf_header__has_feat(const struct perf_header *self, int feat); struct perf_header *perf_header__new(void); +int perf_header__process_sections(struct perf_header *self, int fd, + int (*process)(struct perf_file_section *self, + int feat, int fd)); + #endif /* __PERF_HEADER_H */ -- cgit v0.10.2 From 9e03eb2d512e7f3a1e562d4b922aa8b1891750b6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Nov 2009 16:32:44 -0200 Subject: perf tools: Introduce dsos__fprintf_buildid To print the buildids in the list of dsos. Will be used by 'perf buildid-list' Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258396365-29217-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 93e4b52..53de9c4 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -212,14 +212,21 @@ int build_id__sprintf(u8 *self, int len, char *bf) return raw - self; } -size_t dso__fprintf(struct dso *self, FILE *fp) +size_t dso__fprintf_buildid(struct dso *self, FILE *fp) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; - struct rb_node *nd; - size_t ret; build_id__sprintf(self->build_id, sizeof(self->build_id), sbuild_id); - ret = fprintf(fp, "dso: %s (%s)\n", self->short_name, sbuild_id); + return fprintf(fp, "%s", sbuild_id); +} + +size_t dso__fprintf(struct dso *self, FILE *fp) +{ + struct rb_node *nd; + size_t ret = fprintf(fp, "dso: %s (", self->short_name); + + ret += dso__fprintf_buildid(self, fp); + ret += fprintf(fp, ")\n"); for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); @@ -1428,6 +1435,21 @@ void dsos__fprintf(FILE *fp) dso__fprintf(pos, fp); } +size_t dsos__fprintf_buildid(FILE *fp) +{ + struct dso *pos; + size_t ret = 0; + + list_for_each_entry(pos, &dsos, node) { + ret += dso__fprintf_buildid(pos, fp); + if (verbose) + ret += fprintf(fp, " %s\n", pos->long_name); + else + ret += fprintf(fp, "\n"); + } + return ret; +} + int load_kernel(symbol_filter_t filter) { if (dsos__load_kernel(vmlinux_name, filter, modules) <= 0) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0a34a54..51c5a4a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -80,7 +80,9 @@ int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, int modules); struct dso *dsos__findnew(const char *name); int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); void dsos__fprintf(FILE *fp); +size_t dsos__fprintf_buildid(FILE *fp); +size_t dso__fprintf_buildid(struct dso *self, FILE *fp); size_t dso__fprintf(struct dso *self, FILE *fp); char dso__symtab_origin(const struct dso *self); void dso__set_build_id(struct dso *self, void *build_id); -- cgit v0.10.2 From c34984b2bbc77596c97c333539bffc90d2033178 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Nov 2009 16:32:45 -0200 Subject: perf buildid-list: New plumbing command With this we can list the buildids in a perf.data file so that we can pipe them to other, distro specific tools that from the buildids can figure out separate packages (foo-debuginfo) where we can find the matching symtabs so that perf report can do its job. E.g: [acme@doppio linux-2.6-tip]$ perf buildid-list | head -5 8e08b117e5458ad3f85da16d42d0fc5cd21c5869 520c2387a587cc5acfcf881e27dba1caaeab4b1f ec8dd400904ddfcac8b1c343263a790f977159dc 7caedbca5a6d8ab39a7fe44bd28c07d3e14a3f3f 379bb828fd08859dbea73279f04abefabc95a6a3 [acme@doppio linux-2.6-tip]$ perf buildid-list -v | head -5 8e08b117e5458ad3f85da16d42d0fc5cd21c5869 /sbin/init 520c2387a587cc5acfcf881e27dba1caaeab4b1f /lib64/ld-2.10.1.so ec8dd400904ddfcac8b1c343263a790f977159dc /lib64/libc-2.10.1.so 7caedbca5a6d8ab39a7fe44bd28c07d3e14a3f3f /sbin/udevd 379bb828fd08859dbea73279f04abefabc95a6a3 /lib64/libdl-2.10.1.so [acme@doppio linux-2.6-tip]$ Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258396365-29217-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt new file mode 100644 index 0000000..abab34e --- /dev/null +++ b/tools/perf/Documentation/perf-buildid-list.txt @@ -0,0 +1,34 @@ +perf-buildid-list(1) +==================== + +NAME +---- +perf-buildid-list - List the buildids in a perf.data file + +SYNOPSIS +-------- +[verse] +'perf buildid-list ' + +DESCRIPTION +----------- +This command displays the buildids found in a perf.data file, so that other +tools can be used to fetch packages with matching symbol tables for use by +perf report. + +OPTIONS +------- +-i:: +--input=:: + Input file name. (default: perf.data) +-f:: +--force:: + Don't do ownership validation. +-v:: +--verbose:: + Be more verbose, showing the name of the DSOs after the buildids. + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-top[1], +linkperf:perf-report[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index f7cd896..46a58a8 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -430,6 +430,7 @@ BUILTIN_OBJS += bench/sched-pipe.o BUILTIN_OBJS += builtin-help.o BUILTIN_OBJS += builtin-sched.o +BUILTIN_OBJS += builtin-buildid-list.o BUILTIN_OBJS += builtin-list.o BUILTIN_OBJS += builtin-record.o BUILTIN_OBJS += builtin-report.o diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c new file mode 100644 index 0000000..2e377e1 --- /dev/null +++ b/tools/perf/builtin-buildid-list.c @@ -0,0 +1,116 @@ +/* + * builtin-buildid-list.c + * + * Builtin buildid-list command: list buildids in perf.data + * + * Copyright (C) 2009, Red Hat Inc. + * Copyright (C) 2009, Arnaldo Carvalho de Melo + */ +#include "builtin.h" +#include "perf.h" +#include "util/cache.h" +#include "util/data_map.h" +#include "util/debug.h" +#include "util/header.h" +#include "util/parse-options.h" +#include "util/symbol.h" + +static char const *input_name = "perf.data"; +static int force; + +static const char *const buildid_list_usage[] = { + "perf report []", + NULL +}; + +static const struct option options[] = { + OPT_STRING('i', "input", &input_name, "file", + "input file name"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_BOOLEAN('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), + OPT_END() +}; + +static int perf_file_section__process_buildids(struct perf_file_section *self, + int feat, int fd) +{ + if (feat != HEADER_BUILD_ID) + return 0; + + if (lseek(fd, self->offset, SEEK_SET) < 0) { + pr_warning("Failed to lseek to %Ld offset for buildids!\n", + self->offset); + return -1; + } + + if (perf_header__read_build_ids(fd, self->offset, self->size)) { + pr_warning("Failed to read buildids!\n"); + return -1; + } + + return 0; +} + +static int __cmd_buildid_list(void) +{ + int err = -1; + struct perf_header *header; + struct perf_file_header f_header; + struct stat input_stat; + int input = open(input_name, O_RDONLY); + + if (input < 0) { + pr_err("failed to open file: %s", input_name); + if (!strcmp(input_name, "perf.data")) + pr_err(" (try 'perf record' first)"); + pr_err("\n"); + goto out; + } + + err = fstat(input, &input_stat); + if (err < 0) { + perror("failed to stat file"); + goto out_close; + } + + if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { + pr_err("file %s not owned by current user or root\n", + input_name); + goto out_close; + } + + if (!input_stat.st_size) { + pr_info("zero-sized file, nothing to do!\n"); + goto out_close; + } + + err = -1; + header = perf_header__new(); + if (header == NULL) + goto out_close; + + if (perf_file_header__read(&f_header, header, input) < 0) { + pr_warning("incompatible file format"); + goto out_close; + } + + err = perf_header__process_sections(header, input, + perf_file_section__process_buildids); + + if (err < 0) + goto out_close; + + dsos__fprintf_buildid(stdout); +out_close: + close(input); +out: + return err; +} + +int cmd_buildid_list(int argc, const char **argv, const char *prefix __used) +{ + argc = parse_options(argc, argv, options, buildid_list_usage, 0); + setup_pager(); + return __cmd_buildid_list(); +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index f0cd5b1..e97954a 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -16,6 +16,7 @@ extern int check_pager_config(const char *cmd); extern int cmd_annotate(int argc, const char **argv, const char *prefix); extern int cmd_bench(int argc, const char **argv, const char *prefix); +extern int cmd_buildid_list(int argc, const char **argv, const char *prefix); extern int cmd_help(int argc, const char **argv, const char *prefix); extern int cmd_sched(int argc, const char **argv, const char *prefix); extern int cmd_list(int argc, const char **argv, const char *prefix); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 981c40b..d37b16c 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -4,6 +4,7 @@ # perf-annotate mainporcelain common perf-bench mainporcelain common +perf-buildid-list mainporcelain common perf-list mainporcelain common perf-sched mainporcelain common perf-record mainporcelain common diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 8936786..53359eb 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -287,6 +287,7 @@ static void handle_internal_command(int argc, const char **argv) static struct cmd_struct commands[] = { { "help", cmd_help, 0 }, { "list", cmd_list, 0 }, + { "buildid-list", cmd_buildid_list, 0 }, { "record", cmd_record, 0 }, { "report", cmd_report, 0 }, { "bench", cmd_bench, 0 }, -- cgit v0.10.2 From d65ff75fbe6f8ac7c17f18e4108521898468822c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 16 Nov 2009 18:06:18 -0500 Subject: x86: Add verbose option to insn decoder test Add verbose option to insn decoder test. This dumps decoded instruction when building kernel with V=1. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Stephen Rothwell Cc: Randy Dunlap Cc: Jim Keniston Cc: Stephen Rothwell LKML-Reference: <20091116230618.5250.18762.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 5e295d9..4688f90 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -1,6 +1,13 @@ PHONY += posttest + +ifeq ($(KBUILD_VERBOSE),1) + postest_verbose = -v +else + postest_verbose = +endif + quiet_cmd_posttest = TEST $@ - cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len $(CONFIG_64BIT) + cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len -$(CONFIG_64BIT) $(posttest_verbose) posttest: $(obj)/test_get_len vmlinux $(call cmd,posttest) diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index 376d338..5743e51 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -20,6 +20,7 @@ #include #include #include +#include #define unlikely(cond) (cond) @@ -36,11 +37,16 @@ */ const char *prog; +static int verbose; +static int x86_64; static void usage(void) { fprintf(stderr, "Usage: objdump -d a.out | awk -f distill.awk |" - " %s [y|n](64bit flag)\n", prog); + " %s [-y|-n] [-v] \n", prog); + fprintf(stderr, "\t-y 64bit mode\n"); + fprintf(stderr, "\t-n 32bit mode\n"); + fprintf(stderr, "\t-v verbose mode\n"); exit(1); } @@ -50,6 +56,56 @@ static void malformed_line(const char *line, int line_nr) exit(3); } +static void dump_field(FILE *fp, const char *name, const char *indent, + struct insn_field *field) +{ + fprintf(fp, "%s.%s = {\n", indent, name); + fprintf(fp, "%s\t.value = %d, bytes[] = {%x, %x, %x, %x},\n", + indent, field->value, field->bytes[0], field->bytes[1], + field->bytes[2], field->bytes[3]); + fprintf(fp, "%s\t.got = %d, .nbytes = %d},\n", indent, + field->got, field->nbytes); +} + +static void dump_insn(FILE *fp, struct insn *insn) +{ + fprintf(fp, "Instruction = { \n"); + dump_field(fp, "prefixes", "\t", &insn->prefixes); + dump_field(fp, "rex_prefix", "\t", &insn->rex_prefix); + dump_field(fp, "vex_prefix", "\t", &insn->vex_prefix); + dump_field(fp, "opcode", "\t", &insn->opcode); + dump_field(fp, "modrm", "\t", &insn->modrm); + dump_field(fp, "sib", "\t", &insn->sib); + dump_field(fp, "displacement", "\t", &insn->displacement); + dump_field(fp, "immediate1", "\t", &insn->immediate1); + dump_field(fp, "immediate2", "\t", &insn->immediate2); + fprintf(fp, "\t.attr = %x, .opnd_bytes = %d, .addr_bytes = %d,\n", + insn->attr, insn->opnd_bytes, insn->addr_bytes); + fprintf(fp, "\t.length = %d, .x86_64 = %d, .kaddr = %p}\n", + insn->length, insn->x86_64, insn->kaddr); +} + +static void parse_args(int argc, char **argv) +{ + int c; + prog = argv[0]; + while ((c = getopt(argc, argv, "ynv")) != -1) { + switch (c) { + case 'y': + x86_64 = 1; + break; + case 'n': + x86_64 = 0; + break; + case 'v': + verbose = 1; + break; + default: + usage(); + } + } +} + #define BUFSIZE 256 int main(int argc, char **argv) @@ -57,15 +113,9 @@ int main(int argc, char **argv) char line[BUFSIZE]; unsigned char insn_buf[16]; struct insn insn; - int insns = 0; - int x86_64 = 0; - - prog = argv[0]; - if (argc > 2) - usage(); + int insns = 0, c; - if (argc == 2 && argv[1][0] == 'y') - x86_64 = 1; + parse_args(argc, argv); while (fgets(line, BUFSIZE, stdin)) { char copy[BUFSIZE], *s, *tab1, *tab2; @@ -97,8 +147,10 @@ int main(int argc, char **argv) if (insn.length != nb) { fprintf(stderr, "Error: %s", line); fprintf(stderr, "Error: objdump says %d bytes, but " - "insn_get_length() says %d (attr:%x)\n", nb, - insn.length, insn.attr); + "insn_get_length() says %d\n", nb, + insn.length); + if (verbose) + dump_insn(stderr, &insn); exit(2); } } -- cgit v0.10.2 From 35039eb6b199749943547c8572be6604edf00229 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 16 Nov 2009 18:06:24 -0500 Subject: x86: Show symbol name if insn decoder test failed Show symbol name if insn decoder test find a difference. This will help us to find out where the issue is. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Stephen Rothwell Cc: Randy Dunlap Cc: Jim Keniston Cc: Stephen Rothwell LKML-Reference: <20091116230624.5250.49813.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/arch/x86/tools/distill.awk b/arch/x86/tools/distill.awk index d433619..c13c0ee 100644 --- a/arch/x86/tools/distill.awk +++ b/arch/x86/tools/distill.awk @@ -15,6 +15,11 @@ BEGIN { fwait_str="9b\tfwait" } +/^ *[0-9a-f]+ <[^>]*>:/ { + # Symbol entry + printf("%s%s\n", $2, $1) +} + /^ *[0-9a-f]+:/ { if (split($0, field, "\t") < 3) { # This is a continuation of the same insn. diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index 5743e51..af75e07 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -110,7 +110,7 @@ static void parse_args(int argc, char **argv) int main(int argc, char **argv) { - char line[BUFSIZE]; + char line[BUFSIZE], sym[BUFSIZE] = ""; unsigned char insn_buf[16]; struct insn insn; int insns = 0, c; @@ -122,6 +122,12 @@ int main(int argc, char **argv) int nb = 0; unsigned int b; + if (line[0] == '<') { + /* Symbol line */ + strcpy(sym, line); + continue; + } + insns++; memset(insn_buf, 0, 16); strcpy(copy, line); @@ -145,6 +151,8 @@ int main(int argc, char **argv) insn_init(&insn, insn_buf, x86_64); insn_get_length(&insn); if (insn.length != nb) { + fprintf(stderr, "Error: %s found a difference at %s\n", + prog, sym); fprintf(stderr, "Error: %s", line); fprintf(stderr, "Error: objdump says %d bytes, but " "insn_get_length() says %d\n", nb, -- cgit v0.10.2 From dc79c0fc08a94b857aed446bfb47cdfde529400c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Nov 2009 19:30:26 -0200 Subject: perf tools: Don't die in perf_header_attr__new() We really should propagate such kinds of errors so that users of these library functions decide what to do in such cases instead of exiting in random places like now. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258407027-384-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 04f335e..4c03bb7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -220,7 +220,8 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n h_attr = header->attr[nr]; } else { h_attr = perf_header_attr__new(a); - perf_header__add_attr(header, h_attr); + if (h_attr != NULL) + perf_header__add_attr(header, h_attr); } return h_attr; @@ -308,6 +309,8 @@ try_again: } h_attr = get_header_attr(attr, counter); + if (h_attr == NULL) + die("nomem\n"); if (!file_new) { if (memcmp(&h_attr->attr, attr, sizeof(*attr))) { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d8416f0..2f07a23 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -19,16 +19,16 @@ struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr) { struct perf_header_attr *self = malloc(sizeof(*self)); - if (!self) - die("nomem"); - - self->attr = *attr; - self->ids = 0; - self->size = 1; - self->id = malloc(sizeof(u64)); - - if (!self->id) - die("nomem"); + if (self != NULL) { + self->attr = *attr; + self->ids = 0; + self->size = 1; + self->id = malloc(sizeof(u64)); + if (self->id == NULL) { + free(self); + self = NULL; + } + } return self; } @@ -423,6 +423,8 @@ struct perf_header *perf_header__read(int fd) tmp = lseek(fd, 0, SEEK_CUR); attr = perf_header_attr__new(&f_attr.attr); + if (attr == NULL) + die("nomem"); nr_ids = f_attr.ids.size / sizeof(u64); lseek(fd, f_attr.ids.offset, SEEK_SET); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index f1b3bf7..0cbd4c9 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -64,9 +64,7 @@ void perf_header__add_attr(struct perf_header *self, void perf_header__push_event(u64 id, const char *name); char *perf_header__find_event(u64 id); - -struct perf_header_attr * -perf_header_attr__new(struct perf_event_attr *attr); +struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr); void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); u64 perf_header__sample_type(struct perf_header *header); -- cgit v0.10.2 From 3b6ed98895b0fccd8c387f3fc44016fb922c0658 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Nov 2009 19:30:27 -0200 Subject: perf top: Use all the lines in the screen By querying the current number of rows, if the user specifies the number of entries, use that instead. If the user uses the 'e' command to change the number of lines 0 will mean do it automatically, any other number disables the auto resizing. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258407027-384-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6613f98..3af9520 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -60,7 +60,7 @@ static int system_wide = 0; static int default_interval = 0; static int count_filter = 5; -static int print_entries = 15; +static int print_entries; static int target_pid = -1; static int inherit = 0; @@ -115,6 +115,36 @@ struct sym_entry { * Source functions */ +/* most GUI terminals set LINES (although some don't export it) */ +static int term_rows(void) +{ + char *lines_string = getenv("LINES"); + int n_lines; + + if (lines_string && (n_lines = atoi(lines_string)) > 0) + return n_lines; +#ifdef TIOCGWINSZ + else { + struct winsize ws; + if (!ioctl(1, TIOCGWINSZ, &ws) && ws.ws_row) + return ws.ws_row; + } +#endif + return 25; +} + +static void update_print_entries(void) +{ + print_entries = term_rows(); + if (print_entries > 9) + print_entries -= 9; +} + +static void sig_winch_handler(int sig __used) +{ + update_print_entries(); +} + static void parse_source(struct sym_entry *syme) { struct symbol *sym; @@ -668,6 +698,11 @@ static void handle_keypress(int c) break; case 'e': prompt_integer(&print_entries, "Enter display entries (lines)"); + if (print_entries == 0) { + update_print_entries(); + signal(SIGWINCH, sig_winch_handler); + } else + signal(SIGWINCH, SIG_DFL); break; case 'E': if (nr_counters > 1) { @@ -1228,5 +1263,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (target_pid != -1 || profile_cpu != -1) nr_cpus = 1; + if (print_entries == 0) { + update_print_entries(); + signal(SIGWINCH, sig_winch_handler); + } + return __cmd_top(); } -- cgit v0.10.2 From 8ffcda17314cfeb698a667567ea63f63362dffbb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Nov 2009 21:45:24 -0200 Subject: perf top: Introduce --hide_{user,kernel}_symbols Default continues to be showing all symbols. 'K' and 'U' can be used to toggle showing kernel and user symbols. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258415125-15019-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 3af9520..89b7f68 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -76,6 +76,9 @@ static int delay_secs = 2; static int zero = 0; static int dump_symtab = 0; +static bool hide_kernel_symbols = false; +static bool hide_user_symbols = false; + /* * Source */ @@ -104,6 +107,7 @@ struct sym_entry { unsigned long snap_count; double weight; int skip; + u8 origin; struct map *map; struct source_line *source; struct source_line *lines; @@ -430,6 +434,13 @@ static void print_sym_table(void) list_for_each_entry_safe_from(syme, n, &active_symbols, node) { syme->snap_count = syme->count[snap]; if (syme->snap_count != 0) { + if ((hide_user_symbols && + syme->origin == PERF_RECORD_MISC_USER) || + (hide_kernel_symbols && + syme->origin == PERF_RECORD_MISC_KERNEL)) { + list_remove_active_sym(syme); + continue; + } syme->weight = sym_weight(syme); rb_insert_active_sym(&tmp, syme); sum_ksamples += syme->snap_count; @@ -637,6 +648,12 @@ static void print_mapped_keys(void) if (nr_counters > 1) fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); + fprintf(stdout, + "\t[K] hide kernel_symbols symbols. \t(%s)\n", + hide_kernel_symbols ? "yes" : "no"); + fprintf(stdout, + "\t[U] hide user symbols. \t(%s)\n", + hide_user_symbols ? "yes" : "no"); fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); fprintf(stdout, "\t[qQ] quit.\n"); } @@ -650,6 +667,8 @@ static int key_mapped(int c) case 'z': case 'q': case 'Q': + case 'K': + case 'U': return 1; case 'E': case 'w': @@ -727,6 +746,9 @@ static void handle_keypress(int c) case 'F': prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); break; + case 'K': + hide_kernel_symbols = !hide_kernel_symbols; + break; case 'q': case 'Q': printf("exiting.\n"); @@ -746,6 +768,9 @@ static void handle_keypress(int c) pthread_mutex_unlock(&syme->source_lock); } break; + case 'U': + hide_user_symbols = !hide_user_symbols; + break; case 'w': display_weighted = ~display_weighted; break; @@ -857,11 +882,16 @@ static void event__process_sample(const event_t *self, int counter) struct map *map; struct sym_entry *syme; struct symbol *sym; + u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - switch (self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) { + switch (origin) { case PERF_RECORD_MISC_USER: { - struct thread *thread = threads__findnew(self->ip.pid); + struct thread *thread; + if (hide_user_symbols) + return; + + thread = threads__findnew(self->ip.pid); if (thread == NULL) return; @@ -885,6 +915,9 @@ static void event__process_sample(const event_t *self, int counter) return; /* Fall thru */ case PERF_RECORD_MISC_KERNEL: + if (hide_kernel_symbols) + return; + sym = kernel_maps__find_symbol(ip, &map); if (sym == NULL) return; @@ -897,6 +930,7 @@ static void event__process_sample(const event_t *self, int counter) if (!syme->skip) { syme->count[counter]++; + syme->origin = origin; record_precise_ip(syme, counter, ip); pthread_mutex_lock(&active_symbols_lock); if (list_empty(&syme->node) || !syme->node.next) @@ -1178,6 +1212,8 @@ static const struct option options[] = { OPT_INTEGER('C', "CPU", &profile_cpu, "CPU to profile on"), OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), + OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, + "hide kernel symbols"), OPT_INTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), OPT_INTEGER('r', "realtime", &realtime_prio, @@ -1200,6 +1236,8 @@ static const struct option options[] = { "profile at this frequency"), OPT_INTEGER('E', "entries", &print_entries, "display this many functions"), + OPT_BOOLEAN('U', "hide_user_symbols", &hide_user_symbols, + "hide user symbols"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_END() -- cgit v0.10.2 From 1124ba73be6a758965340bd997593b2996649d60 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 Nov 2009 21:45:25 -0200 Subject: perf buildid-list: Always show the DSO name Porcelain can ignore it, humans can make more sense of it. Suggested-by: Frederic Weisbecker Suggested-by: Ingo Molnar Suggested-by: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258415125-15019-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt index abab34e..01b642c 100644 --- a/tools/perf/Documentation/perf-buildid-list.txt +++ b/tools/perf/Documentation/perf-buildid-list.txt @@ -26,7 +26,7 @@ OPTIONS Don't do ownership validation. -v:: --verbose:: - Be more verbose, showing the name of the DSOs after the buildids. + Be more verbose. SEE ALSO -------- diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 2e377e1..7dee9d19 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -28,7 +28,7 @@ static const struct option options[] = { "input file name"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_BOOLEAN('v', "verbose", &verbose, - "be more verbose (show counter open errors, etc)"), + "be more verbose"), OPT_END() }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 53de9c4..1b77e81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1442,10 +1442,7 @@ size_t dsos__fprintf_buildid(FILE *fp) list_for_each_entry(pos, &dsos, node) { ret += dso__fprintf_buildid(pos, fp); - if (verbose) - ret += fprintf(fp, " %s\n", pos->long_name); - else - ret += fprintf(fp, "\n"); + ret += fprintf(fp, " %s\n", pos->long_name); } return ret; } -- cgit v0.10.2 From 11deb1f9f6ca6318fa9470e024b9f0634df48b4c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Nov 2009 01:18:09 -0200 Subject: perf tools: Don't die() in perf_header__add_attr() Propagate the errors instead, the users are the ones to decide what to do if a library call fails. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258427892-16312-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4c03bb7..5411be4 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -221,7 +221,10 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n } else { h_attr = perf_header_attr__new(a); if (h_attr != NULL) - perf_header__add_attr(header, h_attr); + if (perf_header__add_attr(header, h_attr) < 0) { + perf_header_attr__delete(h_attr); + h_attr = NULL; + } } return h_attr; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2f07a23..23ccdda 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -33,6 +33,12 @@ struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr) return self; } +void perf_header_attr__delete(struct perf_header_attr *self) +{ + free(self->id); + free(self); +} + void perf_header_attr__add_id(struct perf_header_attr *self, u64 id) { int pos = self->ids; @@ -66,22 +72,28 @@ struct perf_header *perf_header__new(void) return self; } -void perf_header__add_attr(struct perf_header *self, - struct perf_header_attr *attr) +int perf_header__add_attr(struct perf_header *self, + struct perf_header_attr *attr) { int pos = self->attrs; if (self->frozen) - die("frozen"); + return -1; self->attrs++; if (self->attrs > self->size) { - self->size *= 2; - self->attr = realloc(self->attr, self->size * sizeof(void *)); - if (!self->attr) - die("nomem"); + int nsize = self->size * 2; + struct perf_header_attr **nattr; + + nattr = realloc(self->attr, nsize * sizeof(void *)); + if (nattr == NULL) + return -1; + + self->size = nsize; + self->attr = nattr; } self->attr[pos] = attr; + return 0; } #define MAX_EVENT_NAME 64 @@ -434,7 +446,9 @@ struct perf_header *perf_header__read(int fd) perf_header_attr__add_id(attr, f_id); } - perf_header__add_attr(self, attr); + if (perf_header__add_attr(self, attr) < 0) + die("nomem"); + lseek(fd, tmp, SEEK_SET); } diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 0cbd4c9..b0d5cd7 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -58,13 +58,15 @@ struct perf_header { struct perf_header *perf_header__read(int fd); void perf_header__write(struct perf_header *self, int fd, bool at_exit); -void perf_header__add_attr(struct perf_header *self, - struct perf_header_attr *attr); +int perf_header__add_attr(struct perf_header *self, + struct perf_header_attr *attr); void perf_header__push_event(u64 id, const char *name); char *perf_header__find_event(u64 id); struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr); +void perf_header_attr__delete(struct perf_header_attr *self); + void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); u64 perf_header__sample_type(struct perf_header *header); -- cgit v0.10.2 From 5875412152ce67fb5087157b86ab6597f91d23e8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Nov 2009 01:18:10 -0200 Subject: perf tools: Don't die() in perf_header_attr__add_id() Propagate the errors instead, the users are the ones to decide what to do if a library call fails. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258427892-16312-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 5411be4..2a85205 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -327,7 +327,10 @@ try_again: exit(-1); } - perf_header_attr__add_id(h_attr, read_data.id); + if (perf_header_attr__add_id(h_attr, read_data.id) < 0) { + pr_warning("Not enough memory to add id\n"); + exit(-1); + } assert(fd[nr_cpu][counter] >= 0); fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 23ccdda..dee1ed2f 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -39,18 +39,23 @@ void perf_header_attr__delete(struct perf_header_attr *self) free(self); } -void perf_header_attr__add_id(struct perf_header_attr *self, u64 id) +int perf_header_attr__add_id(struct perf_header_attr *self, u64 id) { int pos = self->ids; self->ids++; if (self->ids > self->size) { - self->size *= 2; - self->id = realloc(self->id, self->size * sizeof(u64)); - if (!self->id) - die("nomem"); + int nsize = self->size * 2; + u64 *nid = realloc(self->id, nsize * sizeof(u64)); + + if (nid == NULL) + return -1; + + self->size = nsize; + self->id = nid; } self->id[pos] = id; + return 0; } /* @@ -444,7 +449,8 @@ struct perf_header *perf_header__read(int fd) for (j = 0; j < nr_ids; j++) { do_read(fd, &f_id, sizeof(f_id)); - perf_header_attr__add_id(attr, f_id); + if (perf_header_attr__add_id(attr, f_id) < 0) + die("nomem"); } if (perf_header__add_attr(self, attr) < 0) die("nomem"); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index b0d5cd7..f46a94e 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -67,7 +67,7 @@ char *perf_header__find_event(u64 id); struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr); void perf_header_attr__delete(struct perf_header_attr *self); -void perf_header_attr__add_id(struct perf_header_attr *self, u64 id); +int perf_header_attr__add_id(struct perf_header_attr *self, u64 id); u64 perf_header__sample_type(struct perf_header *header); struct perf_event_attr * -- cgit v0.10.2 From a9a70bbce7ab0bf3b1cba3ac662c4d502da6305c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Nov 2009 01:18:11 -0200 Subject: perf tools: Don't die() in perf_header__new() Propagate the errors instead, the users are the ones to decide what to do if a library call fails. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258427892-16312-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 2a85205..82260c5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -439,6 +439,11 @@ static int __cmd_record(int argc, const char **argv) else header = perf_header__new(); + if (header == NULL) { + pr_err("Not enough memory for reading perf file header\n"); + return -1; + } + if (raw_samples) { perf_header__set_feat(header, HEADER_TRACE_INFO); } else { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index dee1ed2f..726a0eb 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -65,14 +65,15 @@ struct perf_header *perf_header__new(void) { struct perf_header *self = calloc(sizeof(*self), 1); - if (!self) - die("nomem"); - - self->size = 1; - self->attr = malloc(sizeof(void *)); + if (self != NULL) { + self->size = 1; + self->attr = malloc(sizeof(void *)); - if (!self->attr) - die("nomem"); + if (self->attr == NULL) { + free(self); + self = NULL; + } + } return self; } @@ -426,6 +427,9 @@ struct perf_header *perf_header__read(int fd) u64 f_id; int nr_attrs, nr_ids, i, j; + if (self == NULL) + die("nomem"); + if (perf_file_header__read(&f_header, self, fd) < 0) die("incompatible file format"); -- cgit v0.10.2 From 3726cc75e581c157202da93bb2333cce25c15c98 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Nov 2009 01:18:12 -0200 Subject: perf tools: Don't die() in do_write() Propagate the errors instead, the users are the ones to decide what to do if a library call fails. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra LKML-Reference: <1258427892-16312-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 726a0eb..b01a953 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -161,31 +161,36 @@ bool perf_header__has_feat(const struct perf_header *self, int feat) return test_bit(feat, self->adds_features); } -static void do_write(int fd, void *buf, size_t size) +static int do_write(int fd, const void *buf, size_t size) { while (size) { int ret = write(fd, buf, size); if (ret < 0) - die("failed to write"); + return -1; size -= ret; buf += ret; } + + return 0; } -static void write_buildid_table(int fd, struct list_head *id_head) +static int write_buildid_table(int fd, struct list_head *id_head) { struct build_id_list *iter, *next; list_for_each_entry_safe(iter, next, id_head, list) { struct build_id_event *b = &iter->event; - do_write(fd, b, sizeof(*b)); - do_write(fd, (void *)iter->dso_name, iter->len); + if (do_write(fd, b, sizeof(*b)) < 0 || + do_write(fd, iter->dso_name, iter->len) < 0) + return -1; list_del(&iter->list); free(iter); } + + return 0; } static void @@ -233,12 +238,14 @@ perf_header__adds_write(struct perf_header *self, int fd) /* Write build-ids */ buildid_sec->offset = lseek(fd, 0, SEEK_CUR); - write_buildid_table(fd, &id_list); + if (write_buildid_table(fd, &id_list) < 0) + die("failed to write buildid table"); buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset; } lseek(fd, sec_start, SEEK_SET); - do_write(fd, feat_sec, sec_size); + if (do_write(fd, feat_sec, sec_size) < 0) + die("failed to write feature section"); free(feat_sec); } @@ -256,7 +263,8 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit) attr = self->attr[i]; attr->id_offset = lseek(fd, 0, SEEK_CUR); - do_write(fd, attr->id, attr->ids * sizeof(u64)); + if (do_write(fd, attr->id, attr->ids * sizeof(u64)) < 0) + die("failed to write perf header"); } @@ -272,13 +280,15 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit) .size = attr->ids * sizeof(u64), } }; - do_write(fd, &f_attr, sizeof(f_attr)); + if (do_write(fd, &f_attr, sizeof(f_attr)) < 0) + die("failed to write perf header attribute"); } self->event_offset = lseek(fd, 0, SEEK_CUR); self->event_size = event_count * sizeof(struct perf_trace_event_type); if (events) - do_write(fd, events, self->event_size); + if (do_write(fd, events, self->event_size) < 0) + die("failed to write perf header events"); self->data_offset = lseek(fd, 0, SEEK_CUR); @@ -306,7 +316,8 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit) memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features)); lseek(fd, 0, SEEK_SET); - do_write(fd, &f_header, sizeof(f_header)); + if (do_write(fd, &f_header, sizeof(f_header)) < 0) + die("failed to write perf header"); lseek(fd, self->data_offset + self->data_size, SEEK_SET); self->frozen = 1; -- cgit v0.10.2 From 42109197eb7c01080eea6d9cd48ca23cbc3c566c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 15 Nov 2009 21:19:52 +0900 Subject: x86: gart: Add own dma_mapping_error function GART IOMMU is the only user of bad_dma_address variable. This patch converts GART to use the newer mechanism, fill in ->mapping_error() in struct dma_map_ops, to make dma_mapping_error() work in IOMMU specific way. Signed-off-by: FUJITA Tomonori Acked-by: Jesse Barnes Cc: muli@il.ibm.com Cc: joerg.roedel@amd.com LKML-Reference: <1258287594-8777-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 919182e..61c4d1e 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -47,6 +47,8 @@ static unsigned long iommu_pages; /* .. and in pages */ static u32 *iommu_gatt_base; /* Remapping table */ +static dma_addr_t bad_dma_addr; + /* * If this is disabled the IOMMU will use an optimized flushing strategy * of only flushing when an mapping is reused. With it true the GART is @@ -217,7 +219,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, if (panic_on_overflow) panic("dma_map_area overflow %lu bytes\n", size); iommu_full(dev, size, dir); - return bad_dma_address; + return bad_dma_addr; } for (i = 0; i < npages; i++) { @@ -303,7 +305,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, if (nonforced_iommu(dev, addr, s->length)) { addr = dma_map_area(dev, addr, s->length, dir, 0); - if (addr == bad_dma_address) { + if (addr == bad_dma_addr) { if (i > 0) gart_unmap_sg(dev, sg, i, dir, NULL); nents = 0; @@ -456,7 +458,7 @@ error: iommu_full(dev, pages << PAGE_SHIFT, dir); for_each_sg(sg, s, nents, i) - s->dma_address = bad_dma_address; + s->dma_address = bad_dma_addr; return 0; } @@ -480,7 +482,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, DMA_BIDIRECTIONAL, align_mask); flush_gart(); - if (paddr != bad_dma_address) { + if (paddr != bad_dma_addr) { *dma_addr = paddr; return page_address(page); } @@ -500,6 +502,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); } +static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return (dma_addr == bad_dma_addr); +} + static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) @@ -687,6 +694,7 @@ static struct dma_map_ops gart_dma_ops = { .unmap_page = gart_unmap_page, .alloc_coherent = gart_alloc_coherent, .free_coherent = gart_free_coherent, + .mapping_error = gart_mapping_error, }; static void gart_iommu_shutdown(void) @@ -785,7 +793,7 @@ int __init gart_iommu_init(void) iommu_start = aper_size - iommu_size; iommu_bus_base = info.aper_base + iommu_start; - bad_dma_address = iommu_bus_base; + bad_dma_addr = iommu_bus_base; iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); /* -- cgit v0.10.2 From 8fd524b355daef0945692227e726fb444cebcd4f Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 15 Nov 2009 21:19:53 +0900 Subject: x86: Kill bad_dma_address variable This kills bad_dma_address variable, the old mechanism to enable IOMMU drivers to make dma_mapping_error() work in IOMMU's specific way. bad_dma_address variable was introduced to enable IOMMU drivers to make dma_mapping_error() work in IOMMU's specific way. However, it can't handle systems that use both swiotlb and HW IOMMU. SO we introduced dma_map_ops->mapping_error to solve that case. Intel VT-d, GART, and swiotlb already use dma_map_ops->mapping_error. Calgary, AMD IOMMU, and nommu use zero for an error dma address. This adds DMA_ERROR_CODE and converts them to use it (as SPARC and POWER does). Signed-off-by: FUJITA Tomonori Acked-by: Jesse Barnes Cc: muli@il.ibm.com Cc: joerg.roedel@amd.com LKML-Reference: <1258287594-8777-3-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 6a25d5d..0f6c02f 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -20,7 +20,8 @@ # define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) #endif -extern dma_addr_t bad_dma_address; +#define DMA_ERROR_CODE 0 + extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; @@ -48,7 +49,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); - return (dma_addr == bad_dma_address); + return (dma_addr == DMA_ERROR_CODE); } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 66237fd..093bd52 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -928,7 +928,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, } if (unlikely(address == -1)) - address = bad_dma_address; + address = DMA_ERROR_CODE; WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); @@ -1544,7 +1544,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, pte = dma_ops_get_pte(dom, address); if (!pte) - return bad_dma_address; + return DMA_ERROR_CODE; __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; @@ -1625,7 +1625,7 @@ static dma_addr_t __map_single(struct device *dev, retry: address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, dma_mask); - if (unlikely(address == bad_dma_address)) { + if (unlikely(address == DMA_ERROR_CODE)) { /* * setting next_address here will let the address * allocator only scan the new allocated range in the @@ -1646,7 +1646,7 @@ retry: start = address; for (i = 0; i < pages; ++i) { ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); - if (ret == bad_dma_address) + if (ret == DMA_ERROR_CODE) goto out_unmap; paddr += PAGE_SIZE; @@ -1674,7 +1674,7 @@ out_unmap: dma_ops_free_addresses(dma_dom, address, pages); - return bad_dma_address; + return DMA_ERROR_CODE; } /* @@ -1690,7 +1690,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == bad_dma_address) || + if ((dma_addr == DMA_ERROR_CODE) || (dma_addr + size > dma_dom->aperture_size)) return; @@ -1732,7 +1732,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, INC_STATS_COUNTER(cnt_map_single); if (!check_device(dev)) - return bad_dma_address; + return DMA_ERROR_CODE; dma_mask = *dev->dma_mask; @@ -1743,12 +1743,12 @@ static dma_addr_t map_page(struct device *dev, struct page *page, return (dma_addr_t)paddr; if (!dma_ops_domain(domain)) - return bad_dma_address; + return DMA_ERROR_CODE; spin_lock_irqsave(&domain->lock, flags); addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, dma_mask); - if (addr == bad_dma_address) + if (addr == DMA_ERROR_CODE) goto out; iommu_completion_wait(iommu); @@ -1957,7 +1957,7 @@ static void *alloc_coherent(struct device *dev, size_t size, *dma_addr = __map_single(dev, iommu, domain->priv, paddr, size, DMA_BIDIRECTIONAL, true, dma_mask); - if (*dma_addr == bad_dma_address) { + if (*dma_addr == DMA_ERROR_CODE) { spin_unlock_irqrestore(&domain->lock, flags); goto out_free; } @@ -2110,7 +2110,6 @@ int __init amd_iommu_init_dma_ops(void) prealloc_protection_domains(); iommu_detected = 1; - bad_dma_address = 0; swiotlb = 0; #ifdef CONFIG_GART_IOMMU gart_iommu_aperture_disabled = 1; diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index c84ad03..af9f436 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -245,7 +245,7 @@ static unsigned long iommu_range_alloc(struct device *dev, if (panic_on_overflow) panic("Calgary: fix the allocator.\n"); else - return bad_dma_address; + return DMA_ERROR_CODE; } } @@ -261,11 +261,11 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, void *vaddr, unsigned int npages, int direction) { unsigned long entry; - dma_addr_t ret = bad_dma_address; + dma_addr_t ret = DMA_ERROR_CODE; entry = iommu_range_alloc(dev, tbl, npages); - if (unlikely(entry == bad_dma_address)) + if (unlikely(entry == DMA_ERROR_CODE)) goto error; /* set the return dma address */ @@ -280,7 +280,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, error: printk(KERN_WARNING "Calgary: failed to allocate %u pages in " "iommu %p\n", npages, tbl); - return bad_dma_address; + return DMA_ERROR_CODE; } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, @@ -291,8 +291,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, unsigned long flags; /* were we called with bad_dma_address? */ - badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); - if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { + badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); + if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); return; @@ -374,7 +374,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); entry = iommu_range_alloc(dev, tbl, npages); - if (entry == bad_dma_address) { + if (entry == DMA_ERROR_CODE) { /* makes sure unmap knows to stop */ s->dma_length = 0; goto error; @@ -392,7 +392,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, error: calgary_unmap_sg(dev, sg, nelems, dir, NULL); for_each_sg(sg, s, nelems, i) { - sg->dma_address = bad_dma_address; + sg->dma_address = DMA_ERROR_CODE; sg->dma_length = 0; } return 0; @@ -447,7 +447,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, /* set up tces to cover the allocated range */ mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); - if (mapping == bad_dma_address) + if (mapping == DMA_ERROR_CODE) goto free; *dma_handle = mapping; return ret; @@ -728,7 +728,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) struct iommu_table *tbl = pci_iommu(dev->bus); /* reserve EMERGENCY_PAGES from bad_dma_address and up */ - iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); + iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); /* avoid the BIOS/VGA first 640KB-1MB region */ /* for CalIOC2 - avoid the entire first MB */ @@ -1359,8 +1359,6 @@ static int __init calgary_iommu_init(void) return ret; } - bad_dma_address = 0x0; - return 0; } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index bf621b9..afcc58b 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -43,9 +43,6 @@ int iommu_detected __read_mostly = 0; */ int iommu_pass_through __read_mostly; -dma_addr_t bad_dma_address __read_mostly = 0; -EXPORT_SYMBOL(bad_dma_address); - /* Dummy device used for NULL arguments (normally ISA). */ struct device x86_dma_fallback_dev = { .init_name = "fallback device", diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 875e382..22be12b 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -33,7 +33,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, dma_addr_t bus = page_to_phys(page) + offset; WARN_ON(size == 0); if (!check_addr("map_single", dev, bus, size)) - return bad_dma_address; + return DMA_ERROR_CODE; flush_write_buffers(); return bus; } -- cgit v0.10.2 From 1f7564ca831a00b21bb493ef174c845b2ba9e64d Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 15 Nov 2009 21:19:54 +0900 Subject: x86: Calgary: Remove unnecessary DMA_ERROR_CODE usage This cleans up iommu_alloc() a bit and removes unnecessary DMA_ERROR_CODE usage. Signed-off-by: FUJITA Tomonori Acked-by: Jesse Barnes Cc: muli@il.ibm.com Cc: joerg.roedel@amd.com LKML-Reference: <1258287594-8777-4-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index af9f436..849a099 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -261,12 +261,15 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, void *vaddr, unsigned int npages, int direction) { unsigned long entry; - dma_addr_t ret = DMA_ERROR_CODE; + dma_addr_t ret; entry = iommu_range_alloc(dev, tbl, npages); - if (unlikely(entry == DMA_ERROR_CODE)) - goto error; + if (unlikely(entry == DMA_ERROR_CODE)) { + printk(KERN_WARNING "Calgary: failed to allocate %u pages in " + "iommu %p\n", npages, tbl); + return DMA_ERROR_CODE; + } /* set the return dma address */ ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); @@ -274,13 +277,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, /* put the TCEs in the HW table */ tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, direction); - return ret; - -error: - printk(KERN_WARNING "Calgary: failed to allocate %u pages in " - "iommu %p\n", npages, tbl); - return DMA_ERROR_CODE; } static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, -- cgit v0.10.2 From 123bf0e2eddcda36a33bdfc87aa1fb07229f07b5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 15 Nov 2009 21:19:52 +0900 Subject: x86: gart: Clean up the code a bit Clean up various small stylistic details in the GART code. No functionality changed. Cc: FUJITA Tomonori Cc: Jesse Barnes Cc: muli@il.ibm.com Cc: joerg.roedel@amd.com LKML-Reference: <1258287594-8777-2-git-send-email-fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 61c4d1e..e6a0d40 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -95,7 +95,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, + boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&iommu_bitmap_lock, flags); @@ -297,7 +297,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, int i; #ifdef CONFIG_IOMMU_DEBUG - printk(KERN_DEBUG "dma_map_sg overflow\n"); + pr_debug("dma_map_sg overflow\n"); #endif for_each_sg(sg, s, nents, i) { @@ -392,12 +392,14 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (!dev) dev = &x86_dma_fallback_dev; - out = 0; - start = 0; - start_sg = sgmap = sg; - seg_size = 0; - max_seg_size = dma_get_max_seg_size(dev); - ps = NULL; /* shut up gcc */ + out = 0; + start = 0; + start_sg = sg; + sgmap = sg; + seg_size = 0; + max_seg_size = dma_get_max_seg_size(dev); + ps = NULL; /* shut up gcc */ + for_each_sg(sg, s, nents, i) { dma_addr_t addr = sg_phys(s); @@ -420,11 +422,12 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, sgmap, pages, need) < 0) goto error; out++; - seg_size = 0; - sgmap = sg_next(sgmap); - pages = 0; - start = i; - start_sg = s; + + seg_size = 0; + sgmap = sg_next(sgmap); + pages = 0; + start = i; + start_sg = s; } } @@ -523,7 +526,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; if (iommu_size < 64*1024*1024) { - printk(KERN_WARNING + pr_warning( "PCI-DMA: Warning: Small IOMMU %luMB." " Consider increasing the AGP aperture in BIOS\n", iommu_size >> 20); @@ -578,28 +581,32 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc) aperture_alloc = aper_alloc; } -static int gart_resume(struct sys_device *dev) +static void gart_fixup_northbridges(struct sys_device *dev) { - printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n"); + int i; - if (fix_up_north_bridges) { - int i; + if (!fix_up_north_bridges) + return; - printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n"); + pr_info("PCI-DMA: Restoring GART aperture settings\n"); - for (i = 0; i < num_k8_northbridges; i++) { - struct pci_dev *dev = k8_northbridges[i]; + for (i = 0; i < num_k8_northbridges; i++) { + struct pci_dev *dev = k8_northbridges[i]; - /* - * Don't enable translations just yet. That is the next - * step. Restore the pre-suspend aperture settings. - */ - pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, - aperture_order << 1); - pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, - aperture_alloc >> 25); - } + /* + * Don't enable translations just yet. That is the next + * step. Restore the pre-suspend aperture settings. + */ + pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); + pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); } +} + +static int gart_resume(struct sys_device *dev) +{ + pr_info("PCI-DMA: Resuming GART IOMMU\n"); + + gart_fixup_northbridges(dev); enable_gart_translations(); @@ -612,15 +619,14 @@ static int gart_suspend(struct sys_device *dev, pm_message_t state) } static struct sysdev_class gart_sysdev_class = { - .name = "gart", - .suspend = gart_suspend, - .resume = gart_resume, + .name = "gart", + .suspend = gart_suspend, + .resume = gart_resume, }; static struct sys_device device_gart = { - .id = 0, - .cls = &gart_sysdev_class, + .cls = &gart_sysdev_class, }; /* @@ -635,7 +641,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) void *gatt; int i, error; - printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); + pr_info("PCI-DMA: Disabling AGP.\n"); + aper_size = aper_base = info->aper_size = 0; dev = NULL; for (i = 0; i < num_k8_northbridges; i++) { @@ -653,6 +660,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) } if (!aper_base) goto nommu; + info->aper_base = aper_base; info->aper_size = aper_size >> 20; @@ -675,14 +683,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) flush_gart(); - printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", + pr_info("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); return 0; nommu: /* Should not happen anymore */ - printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" + pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n" "falling back to iommu=soft.\n"); return -1; } @@ -744,23 +752,23 @@ int __init gart_iommu_init(void) !gart_iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { if (max_pfn > MAX_DMA32_PFN) { - printk(KERN_WARNING "More than 4GB of memory " - "but GART IOMMU not available.\n"); - printk(KERN_WARNING "falling back to iommu=soft.\n"); + pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); + pr_warning("falling back to iommu=soft.\n"); } return 0; } /* need to map that range */ - aper_size = info.aper_size << 20; - aper_base = info.aper_base; - end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + aper_size = info.aper_size << 20; + aper_base = info.aper_base; + end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + if (end_pfn > max_low_pfn_mapped) { start_pfn = (aper_base>>PAGE_SHIFT); init_memory_mapping(start_pfn<> PAGE_SHIFT; @@ -775,8 +783,7 @@ int __init gart_iommu_init(void) ret = dma_debug_resize_entries(iommu_pages); if (ret) - printk(KERN_DEBUG - "PCI-DMA: Cannot trace all the entries\n"); + pr_debug("PCI-DMA: Cannot trace all the entries\n"); } #endif @@ -786,15 +793,14 @@ int __init gart_iommu_init(void) */ iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); - agp_memory_reserved = iommu_size; - printk(KERN_INFO - "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", + pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", iommu_size >> 20); - iommu_start = aper_size - iommu_size; - iommu_bus_base = info.aper_base + iommu_start; - bad_dma_addr = iommu_bus_base; - iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); + agp_memory_reserved = iommu_size; + iommu_start = aper_size - iommu_size; + iommu_bus_base = info.aper_base + iommu_start; + bad_dma_addr = iommu_bus_base; + iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); /* * Unmap the IOMMU part of the GART. The alias of the page is @@ -816,7 +822,7 @@ int __init gart_iommu_init(void) * the pages as Not-Present: */ wbinvd(); - + /* * Now all caches are flushed and we can safely enable * GART hardware. Doing it early leaves the possibility -- cgit v0.10.2 From 751386507701010831d72c522171753d2cd903d2 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 29 Oct 2009 17:20:02 +0200 Subject: perf tools: Support static build This makes it possible to build perf statically, by performing: make LDFLAGS=-static Since static libraries are only searched in the order they are specified, move library list from LDFLAGS to EXTLIBS, so that they are put at the end of linker command line. Signed-off-by: Michael S. Tsirkin Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091029152002.GA5406@redhat.com> [ v2: resolved conflicts ] Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3dbb5c5..5d1a8b0 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -145,6 +145,8 @@ all:: # Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call # your external grep (e.g., if your system lacks grep, if its grep is # broken, or spawning external process is slower than built-in grep perf has). +# +# Define LDFLAGS=-static to build a static binary. PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN @@ -208,7 +210,7 @@ ifndef PERF_DEBUG endif CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) -LDFLAGS = -lpthread -lrt -lelf -lm +EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -470,19 +472,19 @@ ifeq ($(uname_S),Darwin) PTHREAD_LIBS = endif -ifeq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) -ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) +ifeq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) +ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]); endif - ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) + ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) BASIC_CFLAGS += -DLIBELF_NO_MMAP endif else msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); endif -ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y) +ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231); BASIC_CFLAGS += -DNO_LIBDWARF else @@ -494,20 +496,20 @@ endif ifdef NO_DEMANGLE BASIC_CFLAGS += -DNO_DEMANGLE else - has_bfd := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y") + has_bfd := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd > /dev/null 2>&1 && echo y") ifeq ($(has_bfd),y) EXTLIBS += -lbfd else - has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y") + has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty > /dev/null 2>&1 && echo y") ifeq ($(has_bfd_iberty),y) EXTLIBS += -lbfd -liberty else - has_bfd_iberty_z := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") + has_bfd_iberty_z := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") ifeq ($(has_bfd_iberty_z),y) EXTLIBS += -lbfd -liberty -lz else - has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y") + has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty > /dev/null 2>&1 && echo y") ifeq ($(has_cplus_demangle),y) EXTLIBS += -liberty BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE -- cgit v0.10.2 From 5a50e33cc916f6a81cb96f0f24f6a88c9ab78b79 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Nov 2009 08:43:01 -0500 Subject: ring-buffer: Move access to commit_page up into function used With the change of the way we process commits. Where a commit only happens at the outer most level, and that we don't need to worry about a commit ending after the rb_start_commit() has been called, the code use to grab the commit page before the tail page to prevent a possible race. But this race no longer exists with the rb_start_commit() rb_end_commit() interface. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3ffa502..4b8293f 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1785,9 +1785,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, static struct ring_buffer_event * rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, unsigned long length, unsigned long tail, - struct buffer_page *commit_page, struct buffer_page *tail_page, u64 *ts) { + struct buffer_page *commit_page = cpu_buffer->commit_page; struct ring_buffer *buffer = cpu_buffer->buffer; struct buffer_page *next_page; int ret; @@ -1890,13 +1890,10 @@ static struct ring_buffer_event * __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, unsigned type, unsigned long length, u64 *ts) { - struct buffer_page *tail_page, *commit_page; + struct buffer_page *tail_page; struct ring_buffer_event *event; unsigned long tail, write; - commit_page = cpu_buffer->commit_page; - /* we just need to protect against interrupts */ - barrier(); tail_page = cpu_buffer->tail_page; write = local_add_return(length, &tail_page->write); @@ -1907,7 +1904,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, /* See if we shot pass the end of this buffer page */ if (write > BUF_PAGE_SIZE) return rb_move_tail(cpu_buffer, length, tail, - commit_page, tail_page, ts); + tail_page, ts); /* We reserved something on the buffer */ -- cgit v0.10.2 From c13d2f7c3231e873f30db92b96c8caa48f100f33 Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Mon, 16 Nov 2009 20:56:13 +0100 Subject: tracing: Fix trace_marker output When a string was written to /tracing/trace_marker, some strange characters appeared in the trace output instead of the string, since a vprint function erroneously called a vararg print function with a va_list argument. This patch fixes the problem and simplifies the related code. Signed-off-by: Carsten Emde LKML-Reference: <4B01AE5D.1010801@osadl.org> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 03c7fd5..12b49ca 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1361,10 +1361,11 @@ int trace_array_vprintk(struct trace_array *tr, pause_graph_tracing(); raw_local_irq_save(irq_flags); __raw_spin_lock(&trace_buf_lock); - len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); - - len = min(len, TRACE_BUF_SIZE-1); - trace_buf[len] = 0; + if (args == NULL) { + strncpy(trace_buf, fmt, TRACE_BUF_SIZE); + len = strlen(trace_buf); + } else + len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); size = sizeof(*entry) + len + 1; buffer = tr->buffer; @@ -1373,10 +1374,10 @@ int trace_array_vprintk(struct trace_array *tr, if (!event) goto out_unlock; entry = ring_buffer_event_data(event); - entry->ip = ip; + entry->ip = ip; memcpy(&entry->buf, trace_buf, len); - entry->buf[len] = 0; + entry->buf[len] = '\0'; if (!filter_check_discard(call, entry, buffer, event)) ring_buffer_unlock_commit(buffer, event); @@ -3319,22 +3320,11 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, return cnt; } -static int mark_printk(const char *fmt, ...) -{ - int ret; - va_list args; - va_start(args, fmt); - ret = trace_vprintk(0, fmt, args); - va_end(args); - return ret; -} - static ssize_t tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { char *buf; - char *end; if (tracing_disabled) return -EINVAL; @@ -3342,7 +3332,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (cnt > TRACE_BUF_SIZE) cnt = TRACE_BUF_SIZE; - buf = kmalloc(cnt + 1, GFP_KERNEL); + buf = kmalloc(cnt + 2, GFP_KERNEL); if (buf == NULL) return -ENOMEM; @@ -3350,14 +3340,13 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, kfree(buf); return -EFAULT; } + if (buf[cnt-1] != '\n') { + buf[cnt] = '\n'; + buf[cnt+1] = '\0'; + } else + buf[cnt] = '\0'; - /* Cut from the first nil or newline. */ - buf[cnt] = '\0'; - end = strchr(buf, '\n'); - if (end) - *end = '\0'; - - cnt = mark_printk("%s\n", buf); + cnt = trace_vprintk(0, buf, NULL); kfree(buf); *fpos += cnt; -- cgit v0.10.2 From d62d77fd18cc82e839e49b7ef3360e4411f7d2e5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 17 Nov 2009 12:29:38 +0100 Subject: perf annotate: Allocate history size correctly Symbol offset history table size does not get updated properly when it is being resized. This leads to garbage results in perf annotate. Signed-off-by: Nick Piggin Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 226f44a..cbac575 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -227,6 +227,10 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int v) *curr = rb_entry(nd, struct symbol, rb_node); prev->end = curr->start - 1; + if (prev->hist) { + free(prev->hist); + prev->hist = calloc(sizeof(u64), prev->end - prev->start); + } prevnd = nd; } @@ -883,6 +887,10 @@ static inline void dso__fill_symbol_holes(struct dso *self) pos->end = prev->end; else if (hole) pos->end = prev->start - 1; + if (pos->hist) { + free(pos->hist); + pos->hist = calloc(sizeof(u64), pos->end - pos->start); + } } } prev = pos; -- cgit v0.10.2 From f6060f46819f313d34a8c8151390cda509c23389 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 5 Nov 2009 11:16:17 +0800 Subject: tracing: Prevent build warning: 'ftrace_graph_buf' defined but not used Prevent build warning when CONFIG_FUNCTION_GRAPH_TRACER is not set. Signed-off-by: Lai Jiangshan LKML-Reference: <4AF24381.5060307@cn.fujitsu.com> Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1ed514f..7f9b51e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2274,7 +2274,6 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset) #define FTRACE_FILTER_SIZE COMMAND_LINE_SIZE static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata; static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata; -static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; static int __init set_ftrace_notrace(char *str) { @@ -2291,6 +2290,7 @@ static int __init set_ftrace_filter(char *str) __setup("ftrace_filter=", set_ftrace_filter); #ifdef CONFIG_FUNCTION_GRAPH_TRACER +static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; static int __init set_graph_function(char *str) { strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); -- cgit v0.10.2 From 638adb0561264a3360a53e93def62288c85d8373 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 17 Nov 2009 10:48:25 -0500 Subject: tracing: Only print objcopy version warning once from recordmcount If the user has an older version of objcopy, that can not handle converting local symbols to global and vice versa, then some functions will not be part of the dynamic function tracer. The current code in recordmcount.pl will print a warning in this case. Unfortunately, there exists lots of files that may have this issue with older objcopys and this will cause a warning for every file compiled with this issue. This patch solves this overwhelming output by creating a .tmp_quiet_recordmcount file on the first instance the warning is encountered. The warning will not print if this file exists. The temp file is deleted at the beginning of the compile to ensure that the warning will happen once again on new compiles (because the issue is still present). Reported-by: Andrew Morton Cc: Sam Ravnborg Signed-off-by: Steven Rostedt diff --git a/Makefile b/Makefile index 9425d1d..1c949ac 100644 --- a/Makefile +++ b/Makefile @@ -379,6 +379,7 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn --exc PHONY += scripts_basic scripts_basic: $(Q)$(MAKE) $(build)=scripts/basic + $(Q)rm -f .tmp_quiet_recordmcount # To avoid any implicit rule to kick in, define an empty command. scripts/basic/%: scripts_basic ; diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index a4e2435..f0d1445 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -162,6 +162,11 @@ my $alignment; # The .align value to use for $mcount_section my $section_type; # Section header plus possible alignment command my $can_use_local = 0; # If we can use local function references +# Shut up recordmcount if user has older objcopy +my $quiet_recordmcount = ".tmp_quiet_recordmcount"; +my $print_warning = 1; +$print_warning = 0 if ( -f $quiet_recordmcount); + ## # check_objcopy - whether objcopy supports --globalize-symbols # @@ -179,10 +184,13 @@ sub check_objcopy } close (IN); - if (!$can_use_local) { + if (!$can_use_local && $print_warning) { print STDERR "WARNING: could not find objcopy version or version " . "is less than 2.17.\n" . - "\tLocal function references is disabled.\n"; + "\tLocal function references are disabled.\n"; + open (QUIET, ">$quiet_recordmcount"); + printf QUIET "Disables the warning from recordmcount.pl\n"; + close QUIET; } } -- cgit v0.10.2 From 6dbfe5a57db3564adf7b2a65068e40f1b4a0d2db Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 17 Nov 2009 18:27:18 +0100 Subject: x86: Fixup last users of irq_chip->typename The typename member of struct irq_chip was kept for migration purposes and is obsolete since more than 2 years. Fix up the leftovers. Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index f068553..f084dfd 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -486,7 +486,7 @@ static void end_cobalt_irq(unsigned int irq) } static struct irq_chip cobalt_irq_type = { - .typename = "Cobalt-APIC", + .name = "Cobalt-APIC", .startup = startup_cobalt_irq, .shutdown = disable_cobalt_irq, .enable = enable_cobalt_irq, @@ -523,7 +523,7 @@ static void end_piix4_master_irq(unsigned int irq) } static struct irq_chip piix4_master_irq_type = { - .typename = "PIIX4-master", + .name = "PIIX4-master", .startup = startup_piix4_master_irq, .ack = ack_cobalt_irq, .end = end_piix4_master_irq, @@ -531,7 +531,7 @@ static struct irq_chip piix4_master_irq_type = { static struct irq_chip piix4_virtual_irq_type = { - .typename = "PIIX4-virtual", + .name = "PIIX4-virtual", .shutdown = disable_8259A_irq, .enable = enable_8259A_irq, .disable = disable_8259A_irq, -- cgit v0.10.2 From a1afb6371bb5341057056194d1168753f6d77242 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 28 Aug 2009 22:19:33 +0400 Subject: genirq: switch /proc/irq/*/spurious to seq_file [ tglx: compacted it a bit ] Signed-off-by: Alexey Dobriyan LKML-Reference: <20090828181743.GA14050@x200.localdomain> Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index dfef5b9..4e47f11 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -148,18 +148,28 @@ static const struct file_operations default_affinity_proc_fops = { }; #endif -static int irq_spurious_read(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int irq_spurious_proc_show(struct seq_file *m, void *v) { - struct irq_desc *desc = irq_to_desc((long) data); - return sprintf(page, "count %u\n" - "unhandled %u\n" - "last_unhandled %u ms\n", - desc->irq_count, - desc->irqs_unhandled, - jiffies_to_msecs(desc->last_unhandled)); + struct irq_desc *desc = irq_to_desc((long) m->private); + + seq_printf(m, "count %u\n" "unhandled %u\n" "last_unhandled %u ms\n", + desc->irq_count, desc->irqs_unhandled, + jiffies_to_msecs(desc->last_unhandled)); + return 0; +} + +static int irq_spurious_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, irq_spurious_proc_show, NULL); } +static const struct file_operations irq_spurious_proc_fops = { + .open = irq_spurious_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + #define MAX_NAMELEN 128 static int name_unique(unsigned int irq, struct irqaction *new_action) @@ -204,7 +214,6 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) void register_irq_proc(unsigned int irq, struct irq_desc *desc) { char name [MAX_NAMELEN]; - struct proc_dir_entry *entry; if (!root_irq_dir || (desc->chip == &no_irq_chip) || desc->dir) return; @@ -223,11 +232,8 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) &irq_affinity_proc_fops, (void *)(long)irq); #endif - entry = create_proc_entry("spurious", 0444, desc->dir); - if (entry) { - entry->data = (void *)(long)irq; - entry->read_proc = irq_spurious_read; - } + proc_create_data("spurious", 0444, desc->dir, + &irq_spurious_proc_fops, (void *)(long)irq); } #undef MAX_NAMELEN -- cgit v0.10.2 From 2ea6dec4a22a6f66f6633876212fd4d195cf8277 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Nov 2009 14:27:27 -0800 Subject: generic-ipi: Add smp_call_function_any() Andrew points out that acpi-cpufreq uses cpumask_any, when it really would prefer to use the same CPU if possible (to avoid an IPI). In general, this seems a good idea to offer. [ tglx: Documented selection preference and Inlined the UP case to avoid the copy of smp_call_function_single() and the extra EXPORT ] Signed-off-by: Rusty Russell Cc: Ingo Molnar Cc: Venkatesh Pallipadi Cc: Len Brown Cc: Zhao Yakui Cc: Dave Jones Cc: Thomas Gleixner Cc: Mike Galbraith Cc: "Zhang, Yanmin" Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner diff --git a/include/linux/smp.h b/include/linux/smp.h index 39c64ba..7a0570e 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -76,6 +76,9 @@ void smp_call_function_many(const struct cpumask *mask, void __smp_call_function_single(int cpuid, struct call_single_data *data, int wait); +int smp_call_function_any(const struct cpumask *mask, + void (*func)(void *info), void *info, int wait); + /* * Generic and arch helpers */ @@ -137,9 +140,15 @@ static inline void smp_send_reschedule(int cpu) { } #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) -static inline void init_call_single_data(void) +static inline void init_call_single_data(void) { } + +static inline int +smp_call_function_any(const struct cpumask *mask, void (*func)(void *info), + void *info, int wait) { + return smp_call_function_single(0, func, info, wait); } + #endif /* !SMP */ /* diff --git a/kernel/smp.c b/kernel/smp.c index 8bd618f..a8c7606 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -319,6 +319,51 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); +/* + * smp_call_function_any - Run a function on any of the given cpus + * @mask: The mask of cpus it can run on. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait until function has completed. + * + * Returns 0 on success, else a negative status code (if no cpus were online). + * Note that @wait will be implicitly turned on in case of allocation failures, + * since we fall back to on-stack allocation. + * + * Selection preference: + * 1) current cpu if in @mask + * 2) any cpu of current node if in @mask + * 3) any other online cpu in @mask + */ +int smp_call_function_any(const struct cpumask *mask, + void (*func)(void *info), void *info, int wait) +{ + unsigned int cpu; + const struct cpumask *nodemask; + int ret; + + /* Try for same CPU (cheapest) */ + cpu = get_cpu(); + if (cpumask_test_cpu(cpu, mask)) + goto call; + + /* Try for same node. */ + nodemask = cpumask_of_node(cpu); + for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids; + cpu = cpumask_next_and(cpu, nodemask, mask)) { + if (cpu_online(cpu)) + goto call; + } + + /* Any online will do: smp_call_function_single handles nr_cpu_ids. */ + cpu = cpumask_any_and(mask, cpu_online_mask); +call: + ret = smp_call_function_single(cpu, func, info, wait); + put_cpu(); + return ret; +} +EXPORT_SYMBOL_GPL(smp_call_function_any); + /** * __smp_call_function_single(): Run a function on another CPU * @cpu: The CPU to run on. -- cgit v0.10.2 From 821d35a56044e522e811f6a1e8632cc230360280 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 18 Nov 2009 14:39:51 +0000 Subject: selinux: Fix warnings scripts/selinux/genheaders/genheaders.c:20: warning: no previous prototype for ?usage? scripts/selinux/genheaders/genheaders.c:26: warning: no previous prototype for ?stoupperx? Signed-off-by: Alan Cox Acked-by: WANG Cong Signed-off-by: James Morris diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c index 3b16145..771b86f 100644 --- a/scripts/selinux/genheaders/genheaders.c +++ b/scripts/selinux/genheaders/genheaders.c @@ -17,13 +17,13 @@ struct security_class_mapping { const char *progname; -void usage(void) +static void usage(void) { printf("usage: %s flask.h av_permissions.h\n", progname); exit(1); } -char *stoupperx(const char *s) +static char *stoupperx(const char *s) { char *s2 = strdup(s); char *p; -- cgit v0.10.2 From 192dcf1d1775736627280a5dd4cb0f605b21857a Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Wed, 18 Nov 2009 13:06:55 -0800 Subject: tracing: Remove the stale include/trace/power.h Commit 6161352 moved the power tracing to include/trace/events/, but left the old header behind. No one is using the old header, and its declarations are now incorrect, so it should be removed. Signed-off-by: Josh Stone Acked-by: Arjan van de Ven Cc: Frank Ch. Eigler Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1258578415-14752-1-git-send-email-jistone@redhat.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/power.h b/include/trace/power.h deleted file mode 100644 index ef20466..0000000 --- a/include/trace/power.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _TRACE_POWER_H -#define _TRACE_POWER_H - -#include -#include - -enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, -}; - -struct power_trace { - ktime_t stamp; - ktime_t end; - int type; - int state; -}; - -DECLARE_TRACE(power_start, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); - -DECLARE_TRACE(power_mark, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); - -DECLARE_TRACE(power_end, - TP_PROTO(struct power_trace *it), - TP_ARGS(it)); - -#endif /* _TRACE_POWER_H */ -- cgit v0.10.2 From 11ada26c78febe4662a8e848f3bff74e3200c920 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 17 Nov 2009 09:05:56 -0800 Subject: perf tools: Add ia64 support for tools/perf/ Compiler on ia64 rejects the "-m64" option. Add arch specific pieces to perf.h Signed-off-by: Tony Luck Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <4b02d7f43514327a@agluck-desktop.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 5d1a8b0..3f0666a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -166,10 +166,12 @@ ifdef NO_64BIT MBITS := -m32 else # - # If we're on a 64-bit kernel, use -m64: + # If we're on a 64-bit kernel (except ia64), use -m64: # - ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) - MBITS := -m64 + ifneq ($(uname_M),ia64) + ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) + MBITS := -m64 + endif endif endif diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 216bdb2..454d5d5 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -53,6 +53,12 @@ #define cpu_relax() asm volatile("" ::: "memory") #endif +#ifdef __ia64__ +#include "../../arch/ia64/include/asm/unistd.h" +#define rmb() asm volatile ("mf" ::: "memory") +#define cpu_relax() asm volatile ("hint @pause" ::: "memory") +#endif + #include #include #include -- cgit v0.10.2 From cfc10d3bcc50d70f72c0f43d03eee965c726ccc0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Nov 2009 15:40:53 -0200 Subject: perf symbols: Add a long_name_len member to struct dso MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using a two bytes hole we already had and since we also need to calculate this strlen for fetching the buildids. We'll use it in 'perf top' to auto-adjust the output based on the terminal width. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258479655-28662-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1b77e81..5cc96c8 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -109,13 +109,24 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp) self->start, self->end, self->name); } +static void dso__set_long_name(struct dso *self, char *name) +{ + self->long_name = name; + self->long_name_len = strlen(name); +} + +static void dso__set_basename(struct dso *self) +{ + self->short_name = basename(self->long_name); +} + struct dso *dso__new(const char *name) { struct dso *self = malloc(sizeof(*self) + strlen(name) + 1); if (self != NULL) { strcpy(self->name, name); - self->long_name = self->name; + dso__set_long_name(self, self->name); self->short_name = self->name; self->syms = RB_ROOT; self->find_symbol = dso__find_symbol; @@ -888,7 +899,7 @@ bool fetch_build_id_table(struct list_head *head) continue; have_buildid = true; memset(&b.header, 0, sizeof(b.header)); - len = strlen(pos->long_name) + 1; + len = pos->long_name_len + 1; len = ALIGN(len, 64); b.header.size = sizeof(b) + len; @@ -1165,6 +1176,7 @@ static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter) dso_name[PATH_MAX]; struct map *map; struct rb_node *last; + char *long_name; if (dot == NULL || strcmp(dot, ".ko")) continue; @@ -1179,9 +1191,11 @@ static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter) snprintf(path, sizeof(path), "%s/%s", dirname, dent->d_name); - map->dso->long_name = strdup(path); - if (map->dso->long_name == NULL) + long_name = strdup(path); + if (long_name == NULL) goto failure; + dso__set_long_name(map->dso, long_name); + dso__set_basename(map->dso); err = dso__load_module_sym(map->dso, map, filter); if (err < 0) @@ -1420,8 +1434,10 @@ struct dso *dsos__findnew(const char *name) if (!dso) { dso = dso__new(name); - if (dso != NULL) + if (dso != NULL) { dsos__add(dso); + dso__set_basename(dso); + } } return dso; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 51c5a4a..5ad1019 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -66,6 +66,7 @@ struct dso { u8 has_build_id:1; unsigned char origin; u8 build_id[BUILD_ID_SIZE]; + u16 long_name_len; const char *short_name; char *long_name; char name[0]; -- cgit v0.10.2 From 13cc5079f235906e60577dbce8da2f9607e67e93 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Nov 2009 15:40:54 -0200 Subject: perf top: Auto adjust symbol and dso widths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We pre-calculate the symbol name length, then after we sort the entries to print, calculate the biggest one and use that for the symbol name width justification, then use the dso->long_name->len to justificate the DSO name, deciding whether using the short or long name depending on how much space we have on the terminal. IOW give as much info to the user as the terminal width allows. Suggested-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258479655-28662-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 89b7f68..a368978 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -78,6 +78,14 @@ static int dump_symtab = 0; static bool hide_kernel_symbols = false; static bool hide_user_symbols = false; +static struct winsize winsize; +static const char *graph_line = + "_____________________________________________________________________" + "_____________________________________________________________________"; +static const char *graph_dotted_line = + "---------------------------------------------------------------------" + "---------------------------------------------------------------------" + "---------------------------------------------------------------------"; /* * Source @@ -107,6 +115,7 @@ struct sym_entry { unsigned long snap_count; double weight; int skip; + u16 name_len; u8 origin; struct map *map; struct source_line *source; @@ -119,34 +128,40 @@ struct sym_entry { * Source functions */ -/* most GUI terminals set LINES (although some don't export it) */ -static int term_rows(void) +static void get_term_dimensions(struct winsize *ws) { - char *lines_string = getenv("LINES"); - int n_lines; - - if (lines_string && (n_lines = atoi(lines_string)) > 0) - return n_lines; -#ifdef TIOCGWINSZ - else { - struct winsize ws; - if (!ioctl(1, TIOCGWINSZ, &ws) && ws.ws_row) - return ws.ws_row; + char *s = getenv("LINES"); + + if (s != NULL) { + ws->ws_row = atoi(s); + s = getenv("COLUMNS"); + if (s != NULL) { + ws->ws_col = atoi(s); + if (ws->ws_row && ws->ws_col) + return; + } } +#ifdef TIOCGWINSZ + if (ioctl(1, TIOCGWINSZ, ws) == 0 && + ws->ws_row && ws->ws_col) + return; #endif - return 25; + ws->ws_row = 25; + ws->ws_col = 80; } -static void update_print_entries(void) +static void update_print_entries(struct winsize *ws) { - print_entries = term_rows(); + print_entries = ws->ws_row; + if (print_entries > 9) print_entries -= 9; } static void sig_winch_handler(int sig __used) { - update_print_entries(); + get_term_dimensions(&winsize); + update_print_entries(&winsize); } static void parse_source(struct sym_entry *syme) @@ -423,6 +438,8 @@ static void print_sym_table(void) struct sym_entry *syme, *n; struct rb_root tmp = RB_ROOT; struct rb_node *nd; + int sym_width = 0, dso_width; + const int win_width = winsize.ws_col - 1; samples = userspace_samples = 0; @@ -434,6 +451,7 @@ static void print_sym_table(void) list_for_each_entry_safe_from(syme, n, &active_symbols, node) { syme->snap_count = syme->count[snap]; if (syme->snap_count != 0) { + if ((hide_user_symbols && syme->origin == PERF_RECORD_MISC_USER) || (hide_kernel_symbols && @@ -453,8 +471,7 @@ static void print_sym_table(void) puts(CONSOLE_CLEAR); - printf( -"------------------------------------------------------------------------------\n"); + printf("%-*.*s\n", win_width, win_width, graph_dotted_line); printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", samples_per_sec, 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); @@ -492,26 +509,44 @@ static void print_sym_table(void) printf(", %d CPUs)\n", nr_cpus); } - printf("------------------------------------------------------------------------------\n\n"); + printf("%-*.*s\n\n", win_width, win_width, graph_dotted_line); if (sym_filter_entry) { show_details(sym_filter_entry); return; } + /* + * Find the longest symbol name that will be displayed + */ + for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { + syme = rb_entry(nd, struct sym_entry, rb_node); + if (++printed > print_entries || + (int)syme->snap_count < count_filter) + continue; + + if (syme->name_len > sym_width) + sym_width = syme->name_len; + } + + printed = 0; + if (nr_counters == 1) printf(" samples pcnt"); else printf(" weight samples pcnt"); + dso_width = winsize.ws_col - sym_width - 29; + if (verbose) printf(" RIP "); - printf(" function DSO\n"); + printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); printf(" %s _______ _____", nr_counters == 1 ? " " : "______"); if (verbose) printf(" ________________"); - printf(" ________________________________ ________________\n\n"); + printf(" %-*.*s %-*.*s\n\n", sym_width, sym_width, graph_line, + dso_width, dso_width, graph_line); for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { struct symbol *sym; @@ -534,8 +569,11 @@ static void print_sym_table(void) percent_color_fprintf(stdout, "%4.1f%%", pcnt); if (verbose) printf(" %016llx", sym->start); - printf(" %-32s", sym->name); - printf(" %s", syme->map->dso->short_name); + printf(" %-*.*s", sym_width, sym_width, sym->name); + printf(" %-*.*s", dso_width, dso_width, + dso_width >= syme->map->dso->long_name_len ? + syme->map->dso->long_name : + syme->map->dso->short_name); printf("\n"); } } @@ -718,7 +756,7 @@ static void handle_keypress(int c) case 'e': prompt_integer(&print_entries, "Enter display entries (lines)"); if (print_entries == 0) { - update_print_entries(); + sig_winch_handler(SIGWINCH); signal(SIGWINCH, sig_winch_handler); } else signal(SIGWINCH, SIG_DFL); @@ -862,6 +900,9 @@ static int symbol_filter(struct map *map, struct symbol *sym) } } + if (!syme->skip) + syme->name_len = strlen(sym->name); + return 0; } @@ -1301,8 +1342,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (target_pid != -1 || profile_cpu != -1) nr_cpus = 1; + get_term_dimensions(&winsize); if (print_entries == 0) { - update_print_entries(); + update_print_entries(&winsize); signal(SIGWINCH, sig_winch_handler); } -- cgit v0.10.2 From 1a105f743d9fa5f7b8eeeca0afb789951164a361 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Nov 2009 15:40:55 -0200 Subject: perf top: Suppress DSO column if only one is present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E.g. [root@doppio ~]# perf top -U --------------------------------------------------------------------------- PerfTop: 482 irqs/sec kernel:100.0% [1000Hz cycles], (all, 2 CPUs) --------------------------------------------------------------------------- DSO: vmlinux samples pcnt function _______ _____ _________________________ 471.00 47.9% read_hpet 57.00 5.8% acpi_os_read_port 30.00 3.1% hpet_next_event 30.00 3.1% find_busiest_group 22.00 2.2% schedule 18.00 1.8% sched_clock_local 14.00 1.4% _spin_lock_irqsave 14.00 1.4% native_read_tsc 13.00 1.3% trace_hardirqs_off 9.00 0.9% fget_light 9.00 0.9% ioread8 8.00 0.8% do_sys_poll Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258479655-28662-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a368978..6db0e37 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -438,8 +438,9 @@ static void print_sym_table(void) struct sym_entry *syme, *n; struct rb_root tmp = RB_ROOT; struct rb_node *nd; - int sym_width = 0, dso_width; + int sym_width = 0, dso_width = 0; const int win_width = winsize.ws_col - 1; + struct dso *unique_dso = NULL, *first_dso = NULL; samples = userspace_samples = 0; @@ -509,7 +510,7 @@ static void print_sym_table(void) printf(", %d CPUs)\n", nr_cpus); } - printf("%-*.*s\n\n", win_width, win_width, graph_dotted_line); + printf("%-*.*s\n", win_width, win_width, graph_dotted_line); if (sym_filter_entry) { show_details(sym_filter_entry); @@ -525,28 +526,47 @@ static void print_sym_table(void) (int)syme->snap_count < count_filter) continue; + if (first_dso == NULL) + unique_dso = first_dso = syme->map->dso; + else if (syme->map->dso != first_dso) + unique_dso = NULL; + + if (syme->map->dso->long_name_len > dso_width) + dso_width = syme->map->dso->long_name_len; + if (syme->name_len > sym_width) sym_width = syme->name_len; } printed = 0; + if (unique_dso) + printf("DSO: %s\n", unique_dso->long_name); + else { + int max_dso_width = winsize.ws_col - sym_width - 29; + if (dso_width > max_dso_width) + dso_width = max_dso_width; + putchar('\n'); + } if (nr_counters == 1) printf(" samples pcnt"); else printf(" weight samples pcnt"); - dso_width = winsize.ws_col - sym_width - 29; - if (verbose) printf(" RIP "); - printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); + printf(" %-*.*s", sym_width, sym_width, "function"); + if (!unique_dso) + printf(" DSO"); + putchar('\n'); printf(" %s _______ _____", nr_counters == 1 ? " " : "______"); if (verbose) printf(" ________________"); - printf(" %-*.*s %-*.*s\n\n", sym_width, sym_width, graph_line, - dso_width, dso_width, graph_line); + printf(" %-*.*s", sym_width, sym_width, graph_line); + if (!unique_dso) + printf(" %-*.*s", dso_width, dso_width, graph_line); + puts("\n"); for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { struct symbol *sym; @@ -570,10 +590,11 @@ static void print_sym_table(void) if (verbose) printf(" %016llx", sym->start); printf(" %-*.*s", sym_width, sym_width, sym->name); - printf(" %-*.*s", dso_width, dso_width, - dso_width >= syme->map->dso->long_name_len ? - syme->map->dso->long_name : - syme->map->dso->short_name); + if (!unique_dso) + printf(" %-*.*s", dso_width, dso_width, + dso_width >= syme->map->dso->long_name_len ? + syme->map->dso->long_name : + syme->map->dso->short_name); printf("\n"); } } -- cgit v0.10.2 From 51a472decb845e920137284a5cfef51fb7d61206 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Nov 2009 18:38:00 -0200 Subject: perf top: Introduce helper function to access symbol from sym_entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258490282-1821-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6db0e37..0d60c51 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -128,6 +128,11 @@ struct sym_entry { * Source functions */ +static inline struct symbol *sym_entry__symbol(struct sym_entry *self) +{ + return (struct symbol *)(self + 1); +} + static void get_term_dimensions(struct winsize *ws) { char *s = getenv("LINES"); @@ -181,7 +186,7 @@ static void parse_source(struct sym_entry *syme) goto out_assign; } - sym = (struct symbol *)(syme + 1); + sym = sym_entry__symbol(syme); map = syme->map; path = map->dso->long_name; @@ -276,7 +281,7 @@ out_unlock: static void lookup_sym_source(struct sym_entry *syme) { - struct symbol *symbol = (struct symbol *)(syme + 1); + struct symbol *symbol = sym_entry__symbol(syme); struct source_line *line; char pattern[PATH_MAX]; @@ -325,7 +330,7 @@ static void show_details(struct sym_entry *syme) if (!syme->source) return; - symbol = (struct symbol *)(syme + 1); + symbol = sym_entry__symbol(syme); printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); @@ -573,7 +578,7 @@ static void print_sym_table(void) double pcnt; syme = rb_entry(nd, struct sym_entry, rb_node); - sym = (struct symbol *)(syme + 1); + sym = sym_entry__symbol(syme); if (++printed > print_entries || (int)syme->snap_count < count_filter) continue; @@ -661,7 +666,7 @@ static void prompt_symbol(struct sym_entry **target, const char *msg) pthread_mutex_unlock(&active_symbols_lock); list_for_each_entry_safe_from(syme, n, &active_symbols, node) { - struct symbol *sym = (struct symbol *)(syme + 1); + struct symbol *sym = sym_entry__symbol(syme); if (!strcmp(buf, sym->name)) { found = syme; @@ -685,7 +690,7 @@ static void print_mapped_keys(void) char *name = NULL; if (sym_filter_entry) { - struct symbol *sym = (struct symbol *)(sym_filter_entry+1); + struct symbol *sym = sym_entry__symbol(sym_filter_entry); name = sym->name; } -- cgit v0.10.2 From 5a8e5a3065bf04b7673262fd6c46123e4b888d2b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Nov 2009 18:38:01 -0200 Subject: perf top: Allocate space only for the number of counters used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reducing memory consumption on a typical desktop machine: From: 32710 root 20 0 172m 142m 1056 S 0.0 4.7 0:00.37 perf To: 420 root 20 0 47528 16m 1056 R 0.3 0.5 0:00.24 perf Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258490282-1821-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0d60c51..49cf876 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -111,7 +111,6 @@ static int display_weighted = -1; struct sym_entry { struct rb_node rb_node; struct list_head node; - unsigned long count[MAX_COUNTERS]; unsigned long snap_count; double weight; int skip; @@ -122,6 +121,7 @@ struct sym_entry { struct source_line *lines; struct source_line **lines_tail; pthread_mutex_t source_lock; + unsigned long count[0]; }; /* @@ -130,7 +130,7 @@ struct sym_entry { static inline struct symbol *sym_entry__symbol(struct sym_entry *self) { - return (struct symbol *)(self + 1); + return ((void *)self) + symbol__priv_size; } static void get_term_dimensions(struct winsize *ws) @@ -1314,8 +1314,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) { int counter; - symbol__init(sizeof(struct sym_entry)); - page_size = sysconf(_SC_PAGE_SIZE); argc = parse_options(argc, argv, options, top_usage, 0); @@ -1332,6 +1330,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (!nr_counters) nr_counters = 1; + symbol__init(sizeof(struct sym_entry) + + (nr_counters + 1) * sizeof(unsigned long)); + if (delay_secs < 1) delay_secs = 1; -- cgit v0.10.2 From b269876c8d57fb8c801bea1fc34b461646c5abd0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 Nov 2009 18:38:02 -0200 Subject: perf top: Don't allocate the source parsing members upfront MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defer to parse_source() time allocating it. Now we use about this much memory: 1724 root 20 0 42104 10m 940 S 0.0 0.4 0:00.23 perf Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258490282-1821-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 49cf876..07b92c3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -108,6 +108,13 @@ static int display_weighted = -1; * Symbols */ +struct sym_entry_source { + struct source_line *source; + struct source_line *lines; + struct source_line **lines_tail; + pthread_mutex_t lock; +}; + struct sym_entry { struct rb_node rb_node; struct list_head node; @@ -117,10 +124,7 @@ struct sym_entry { u16 name_len; u8 origin; struct map *map; - struct source_line *source; - struct source_line *lines; - struct source_line **lines_tail; - pthread_mutex_t source_lock; + struct sym_entry_source *src; unsigned long count[0]; }; @@ -172,6 +176,7 @@ static void sig_winch_handler(int sig __used) static void parse_source(struct sym_entry *syme) { struct symbol *sym; + struct sym_entry_source *source; struct map *map; FILE *file; char command[PATH_MAX*2]; @@ -181,8 +186,17 @@ static void parse_source(struct sym_entry *syme) if (!syme) return; - if (syme->lines) { - pthread_mutex_lock(&syme->source_lock); + if (syme->src == NULL) { + syme->src = calloc(1, sizeof(*source)); + if (syme->src == NULL) + return; + pthread_mutex_init(&syme->src->lock, NULL); + } + + source = syme->src; + + if (source->lines) { + pthread_mutex_lock(&source->lock); goto out_assign; } @@ -202,8 +216,8 @@ static void parse_source(struct sym_entry *syme) if (!file) return; - pthread_mutex_lock(&syme->source_lock); - syme->lines_tail = &syme->lines; + pthread_mutex_lock(&source->lock); + source->lines_tail = &source->lines; while (!feof(file)) { struct source_line *src; size_t dummy = 0; @@ -223,8 +237,8 @@ static void parse_source(struct sym_entry *syme) *c = 0; src->next = NULL; - *syme->lines_tail = src; - syme->lines_tail = &src->next; + *source->lines_tail = src; + source->lines_tail = &src->next; if (strlen(src->line)>8 && src->line[8] == ':') { src->eip = strtoull(src->line, NULL, 16); @@ -238,7 +252,7 @@ static void parse_source(struct sym_entry *syme) pclose(file); out_assign: sym_filter_entry = syme; - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&source->lock); } static void __zero_source_counters(struct sym_entry *syme) @@ -246,7 +260,7 @@ static void __zero_source_counters(struct sym_entry *syme) int i; struct source_line *line; - line = syme->lines; + line = syme->src->lines; while (line) { for (i = 0; i < nr_counters; i++) line->count[i] = 0; @@ -261,13 +275,13 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) if (syme != sym_filter_entry) return; - if (pthread_mutex_trylock(&syme->source_lock)) + if (pthread_mutex_trylock(&syme->src->lock)) return; - if (!syme->source) + if (syme->src == NULL || syme->src->source == NULL) goto out_unlock; - for (line = syme->lines; line; line = line->next) { + for (line = syme->src->lines; line; line = line->next) { if (line->eip == ip) { line->count[counter]++; break; @@ -276,7 +290,7 @@ static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) break; } out_unlock: - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&syme->src->lock); } static void lookup_sym_source(struct sym_entry *syme) @@ -287,14 +301,14 @@ static void lookup_sym_source(struct sym_entry *syme) sprintf(pattern, "<%s>:", symbol->name); - pthread_mutex_lock(&syme->source_lock); - for (line = syme->lines; line; line = line->next) { + pthread_mutex_lock(&syme->src->lock); + for (line = syme->src->lines; line; line = line->next) { if (strstr(line->line, pattern)) { - syme->source = line; + syme->src->source = line; break; } } - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&syme->src->lock); } static void show_lines(struct source_line *queue, int count, int total) @@ -324,24 +338,24 @@ static void show_details(struct sym_entry *syme) if (!syme) return; - if (!syme->source) + if (!syme->src->source) lookup_sym_source(syme); - if (!syme->source) + if (!syme->src->source) return; symbol = sym_entry__symbol(syme); printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); - pthread_mutex_lock(&syme->source_lock); - line = syme->source; + pthread_mutex_lock(&syme->src->lock); + line = syme->src->source; while (line) { total += line->count[sym_counter]; line = line->next; } - line = syme->source; + line = syme->src->source; while (line) { float pcnt = 0.0; @@ -366,7 +380,7 @@ static void show_details(struct sym_entry *syme) line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; line = line->next; } - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&syme->src->lock); if (more) printf("%d lines not displayed, maybe increase display entries [e]\n", more); } @@ -647,10 +661,10 @@ static void prompt_symbol(struct sym_entry **target, const char *msg) /* zero counters of active symbol */ if (syme) { - pthread_mutex_lock(&syme->source_lock); + pthread_mutex_lock(&syme->src->lock); __zero_source_counters(syme); *target = NULL; - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&syme->src->lock); } fprintf(stdout, "\n%s: ", msg); @@ -826,10 +840,10 @@ static void handle_keypress(int c) else { struct sym_entry *syme = sym_filter_entry; - pthread_mutex_lock(&syme->source_lock); + pthread_mutex_lock(&syme->src->lock); sym_filter_entry = NULL; __zero_source_counters(syme); - pthread_mutex_unlock(&syme->source_lock); + pthread_mutex_unlock(&syme->src->lock); } break; case 'U': @@ -915,7 +929,7 @@ static int symbol_filter(struct map *map, struct symbol *sym) syme = symbol__priv(sym); syme->map = map; - pthread_mutex_init(&syme->source_lock, NULL); + syme->src = NULL; if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) sym_filter_entry = syme; -- cgit v0.10.2 From 827f3b4974c5db2968d4979fe6a0ae00ab37bdd8 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Wed, 18 Nov 2009 00:20:09 +0900 Subject: perf bench: Add memcpy() benchmark 'perf bench mem memcpy' is a benchmark suite for measuring memcpy() performance. Example on a Intel(R) Core(TM)2 Duo CPU E6850 @ 3.00GHz: | % perf bench mem memcpy -l 1GB | # Running mem/memcpy benchmark... | # Copying 1MB Bytes from 0xb7d98008 to 0xb7e99008 ... | | 726.216412 MB/Sec Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1258471212-30281-1-git-send-email-mitake@dcl.info.waseda.ac.jp> [ v2: updated changelog, clarified history of builtin-bench.c ] Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3f0666a..53e663a 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -432,6 +432,7 @@ BUILTIN_OBJS += builtin-bench.o # Benchmark modules BUILTIN_OBJS += bench/sched-messaging.o BUILTIN_OBJS += bench/sched-pipe.o +BUILTIN_OBJS += bench/mem-memcpy.o BUILTIN_OBJS += builtin-help.o BUILTIN_OBJS += builtin-sched.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 9fbd8d7..f7781c6 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -3,6 +3,7 @@ extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); +extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c new file mode 100644 index 0000000..d4f4f98 --- /dev/null +++ b/tools/perf/bench/mem-memcpy.c @@ -0,0 +1,186 @@ +/* + * mem-memcpy.c + * + * memcpy: Simple memory copy in various ways + * + * Written by Hitoshi Mitake + */ +#include + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/string.h" +#include "../util/header.h" +#include "bench.h" + +#include +#include +#include +#include +#include + +#define K 1024 + +static const char *length_str = "1MB"; +static const char *routine = "default"; +static int use_clock = 0; + +static const struct option options[] = { + OPT_STRING('l', "length", &length_str, "1MB", + "Specify length of memory to copy. " + "available unit: B, MB, GB (upper and lower)"), + OPT_STRING('r', "routine", &routine, "default", + "Specify routine to copy"), + OPT_BOOLEAN('c', "clock", &use_clock, + "Use CPU clock for measuring"), + OPT_END() +}; + +struct routine { + const char *name; + const char *desc; + void * (*fn)(void *dst, const void *src, size_t len); +}; + +struct routine routines[] = { + { "default", + "Default memcpy() provided by glibc", + memcpy }, + { NULL, + NULL, + NULL } +}; + +static const char * const bench_mem_memcpy_usage[] = { + "perf bench mem memcpy ", + NULL +}; + +static int clock_fd; + +static struct perf_event_attr clock_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_clock(void) +{ + clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + BUG_ON(clock_fd < 0); +} + +static u64 get_clock(void) +{ + int ret; + u64 clk; + + ret = read(clock_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + + (double)ts->tv_usec / (double)1000000; +} + +int bench_mem_memcpy(int argc, const char **argv, + const char *prefix __used) +{ + int i; + void *dst, *src; + size_t length; + double bps = 0.0; + struct timeval tv_start, tv_end, tv_diff; + u64 clock_start, clock_end, clock_diff; + + clock_start = clock_end = clock_diff = 0ULL; + argc = parse_options(argc, argv, options, + bench_mem_memcpy_usage, 0); + + tv_diff.tv_sec = 0; + tv_diff.tv_usec = 0; + length = (size_t)perf_atoll((char *)length_str); + if ((long long int)length <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + for (i = 0; routines[i].name; i++) { + if (!strcmp(routines[i].name, routine)) + break; + } + if (!routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; routines[i].name; i++) { + printf("\t%s ... %s\n", + routines[i].name, routines[i].desc); + } + return 1; + } + + dst = calloc(length, sizeof(char)); + assert(dst); + src = calloc(length, sizeof(char)); + assert(src); + + if (bench_format == BENCH_FORMAT_DEFAULT) { + printf("# Copying %s Bytes from %p to %p ...\n\n", + length_str, src, dst); + } + + if (use_clock) { + init_clock(); + clock_start = get_clock(); + } else + BUG_ON(gettimeofday(&tv_start, NULL)); + + routines[i].fn(dst, src, length); + + if (use_clock) { + clock_end = get_clock(); + clock_diff = clock_end - clock_start; + } else { + BUG_ON(gettimeofday(&tv_end, NULL)); + timersub(&tv_end, &tv_start, &tv_diff); + bps = (double)((double)length / timeval2double(&tv_diff)); + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (use_clock) { + printf(" %14lf Clock/Byte\n", + (double)clock_diff / (double)length); + } else { + if (bps < K) + printf(" %14lf B/Sec\n", bps); + else if (bps < K * K) + printf(" %14lfd KB/Sec\n", bps / 1024); + else if (bps < K * K * K) + printf(" %14lf MB/Sec\n", bps / 1024 / 1024); + else { + printf(" %14lf GB/Sec\n", + bps / 1024 / 1024 / 1024); + } + } + break; + case BENCH_FORMAT_SIMPLE: + if (use_clock) { + printf("%14lf\n", + (double)clock_diff / (double)length); + } else + printf("%lf\n", bps); + break; + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; + } + + return 0; +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 90c39ba..e043eb8 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -12,6 +12,7 @@ * * Available subsystem list: * sched ... scheduler and IPC mechanism + * mem ... memory access performance * */ @@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = { NULL } }; +static struct bench_suite mem_suites[] = { + { "memcpy", + "Simple memory copy in various ways", + bench_mem_memcpy }, + { NULL, + NULL, + NULL } +}; + struct bench_subsys { const char *name; const char *summary; @@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = { { "sched", "scheduler and IPC mechanism", sched_suites }, + { "mem", + "memory access performance", + mem_suites }, { NULL, NULL, - NULL } + NULL } }; static void dump_suites(int subsys_index) -- cgit v0.10.2 From d3379ab9050e5522da2aac53d413651fc06be562 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Nov 2009 20:20:50 -0200 Subject: perf symbols: Fix comparision of build_ids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we read the build_id from the DSO name to then index into /usr/lib/debug/.buildid/DSO_BUILD_ID[0:2]/DSO_BUILD_ID[2:], we were jumping directly to the comparision with the buildid we already have in dso->build_id (that came from the perf.data build_id section, collected at perf record time) unconditionally, even if we didn't had recorded it, and furthermore, comparing a formatted buildid with a rawbuildid, yikes. Fix it by deleting the dso__read_build_id() function, that was really misdesigned anyway, and do the necessary checks and correct comparison of raw buildids. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258582853-8579-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5cc96c8..594f36a 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -962,25 +962,6 @@ out: return err; } -static char *dso__read_build_id(struct dso *self) -{ - int len; - char *build_id = NULL; - unsigned char rawbf[BUILD_ID_SIZE]; - - len = filename__read_build_id(self->long_name, rawbf, sizeof(rawbf)); - if (len < 0) - goto out; - - build_id = malloc(len * 2 + 1); - if (build_id == NULL) - goto out; - - build_id__sprintf(rawbf, len, build_id); -out: - return build_id; -} - char dso__symtab_origin(const struct dso *self) { static const char origin[] = { @@ -1001,7 +982,8 @@ char dso__symtab_origin(const struct dso *self) int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) { int size = PATH_MAX; - char *name = malloc(size), *build_id = NULL; + char *name = malloc(size); + u8 build_id[BUILD_ID_SIZE]; int ret = -1; int fd; @@ -1023,8 +1005,6 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) more: do { - int berr = 0; - self->origin++; switch (self->origin) { case DSO__ORIG_FEDORA: @@ -1036,12 +1016,18 @@ more: self->long_name); break; case DSO__ORIG_BUILDID: - build_id = dso__read_build_id(self); - if (build_id != NULL) { + if (filename__read_build_id(self->long_name, build_id, + sizeof(build_id))) { + char build_id_hex[BUILD_ID_SIZE * 2 + 1]; + + build_id__sprintf(build_id, sizeof(build_id), + build_id_hex); snprintf(name, size, "/usr/lib/debug/.build-id/%.2s/%s.debug", - build_id, build_id + 2); - goto compare_build_id; + build_id_hex, build_id_hex + 2); + if (self->has_build_id) + goto compare_build_id; + break; } self->origin++; /* Fall thru */ @@ -1054,18 +1040,12 @@ more: } if (self->has_build_id) { - bool match; - build_id = malloc(BUILD_ID_SIZE); - if (build_id == NULL) + if (filename__read_build_id(name, build_id, + sizeof(build_id)) < 0) goto more; - berr = filename__read_build_id(name, build_id, - BUILD_ID_SIZE); compare_build_id: - match = berr > 0 && memcmp(build_id, self->build_id, - sizeof(self->build_id)) == 0; - free(build_id); - build_id = NULL; - if (!match) + if (memcmp(build_id, self->build_id, + sizeof(self->build_id)) != 0) goto more; } -- cgit v0.10.2 From e30a3d12ddf04add3268bfceb0e57ffe47f254c6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Nov 2009 20:20:51 -0200 Subject: perf symbols: Kill struct build_id_list and die() another day MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need for this struct and its allocations, we can just use the ->build_id member we already have in struct dso, then ask for it to be read, and later traverse the dsos list, writing the buildid table to the perf.data file. As a bonus, one more die() function got killed. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258582853-8579-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 1f771ce..34c6fcb 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -69,13 +69,6 @@ struct build_id_event { char filename[]; }; -struct build_id_list { - struct build_id_event event; - struct list_head list; - const char *dso_name; - int len; -}; - typedef union event_union { struct perf_event_header header; struct ip_event ip; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b01a953..31731f1 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -176,18 +176,24 @@ static int do_write(int fd, const void *buf, size_t size) return 0; } -static int write_buildid_table(int fd, struct list_head *id_head) +static int dsos__write_buildid_table(int fd) { - struct build_id_list *iter, *next; - - list_for_each_entry_safe(iter, next, id_head, list) { - struct build_id_event *b = &iter->event; - - if (do_write(fd, b, sizeof(*b)) < 0 || - do_write(fd, iter->dso_name, iter->len) < 0) + struct dso *pos; + + list_for_each_entry(pos, &dsos, node) { + struct build_id_event b; + size_t len; + + if (!pos->has_build_id) + continue; + len = pos->long_name_len + 1; + len = ALIGN(len, 64); + memset(&b, 0, sizeof(b)); + memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); + b.header.size = sizeof(b) + len; + if (do_write(fd, &b, sizeof(b)) < 0 || + do_write(fd, pos->long_name, len) < 0) return -1; - list_del(&iter->list); - free(iter); } return 0; @@ -196,14 +202,13 @@ static int write_buildid_table(int fd, struct list_head *id_head) static void perf_header__adds_write(struct perf_header *self, int fd) { - LIST_HEAD(id_list); int nr_sections; struct perf_file_section *feat_sec; int sec_size; u64 sec_start; int idx = 0; - if (fetch_build_id_table(&id_list)) + if (dsos__read_build_ids()) perf_header__set_feat(self, HEADER_BUILD_ID); nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); @@ -238,7 +243,7 @@ perf_header__adds_write(struct perf_header *self, int fd) /* Write build-ids */ buildid_sec->offset = lseek(fd, 0, SEEK_CUR); - if (write_buildid_table(fd, &id_list) < 0) + if (dsos__write_buildid_table(fd) < 0) die("failed to write buildid table"); buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 594f36a..946ec31 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -883,38 +883,19 @@ out_close: return err; } -bool fetch_build_id_table(struct list_head *head) +bool dsos__read_build_ids(void) { - bool have_buildid = false; + bool have_build_id = false; struct dso *pos; - list_for_each_entry(pos, &dsos, node) { - struct build_id_list *new; - struct build_id_event b; - size_t len; - - if (filename__read_build_id(pos->long_name, - &b.build_id, - sizeof(b.build_id)) < 0) - continue; - have_buildid = true; - memset(&b.header, 0, sizeof(b.header)); - len = pos->long_name_len + 1; - len = ALIGN(len, 64); - b.header.size = sizeof(b) + len; - - new = malloc(sizeof(*new)); - if (!new) - die("No memory\n"); - - memcpy(&new->event, &b, sizeof(b)); - new->dso_name = pos->long_name; - new->len = len; - - list_add_tail(&new->list, head); - } + list_for_each_entry(pos, &dsos, node) + if (filename__read_build_id(pos->long_name, pos->build_id, + sizeof(pos->build_id)) > 0) { + have_build_id = true; + pos->has_build_id = true; + } - return have_buildid; + return have_build_id; } int filename__read_build_id(const char *filename, void *bf, size_t size) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5ad1019..546eb76 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -89,7 +89,7 @@ char dso__symtab_origin(const struct dso *self); void dso__set_build_id(struct dso *self, void *build_id); int filename__read_build_id(const char *filename, void *bf, size_t size); -bool fetch_build_id_table(struct list_head *head); +bool dsos__read_build_ids(void); int build_id__sprintf(u8 *self, int len, char *bf); int load_kernel(symbol_filter_t filter); -- cgit v0.10.2 From f1617b40596cb341ee6602a9d969c5e4cebe9260 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Nov 2009 20:20:52 -0200 Subject: perf symbols: Record the build_ids of kernel modules too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [root@doppio linux-2.6-tip]# perf record -a sleep 2s;perf buildid-list|tail [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.162 MB perf.data (~7078 samples) ] 881588fa57b3c1696bc91e5e804a11304f093535 [cfg80211] 4d47ce1da9d16bad00c962c072451b7c681e82df [snd_page_alloc] 5146377e89a7caac617f9782f1a02e46263d3a31 [rfkill] 2153b937bff0d345fea83b63a2e1d3138569f83d [i915] 4e6fb1bb97362e3ee4d306988b9ad6912d5fb9ae [drm_kms_helper] f56ef2bf853e3a798f0d8d51f797622e5dc4420e [drm] b0d157a3b5c4e017329ffc07c64623cd6ad65e95 [i2c_algo_bit] 8125374b905ef9fa8b65d98e166b008ad952f198 [i2c_core] fc875c6e5a90e7b915e9d445d0efc859e1b2678c [video] 4b43c5006589f977e9762fdfc7ac1a92b72fca52 [output] [root@doppio linux-2.6-tip]# elfutils libdwfl/linux-kernel-modules.c was used as reference, as suggested by Roland McGrath. Signed-off-by: Arnaldo Carvalho de Melo Cc: Roland McGrath Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258582853-8579-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 31731f1..d3d656f 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -241,6 +241,11 @@ perf_header__adds_write(struct perf_header *self, int fd) buildid_sec = &feat_sec[idx++]; + /* + * Read the list of loaded modules with its build_ids + */ + dsos__load_modules(); + /* Write build-ids */ buildid_sec->offset = lseek(fd, 0, SEEK_CUR); if (dsos__write_buildid_table(fd) < 0) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 946ec31..7b4cede 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -9,6 +9,7 @@ #include #include #include +#include #include enum dso_origin { @@ -943,6 +944,50 @@ out: return err; } +int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +{ + int fd, err = -1; + + if (size < BUILD_ID_SIZE) + goto out; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + while (1) { + char bf[BUFSIZ]; + GElf_Nhdr nhdr; + int namesz, descsz; + + if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) + break; + + namesz = (nhdr.n_namesz + 3) & -4U; + descsz = (nhdr.n_descsz + 3) & -4U; + if (nhdr.n_type == NT_GNU_BUILD_ID && + nhdr.n_namesz == sizeof("GNU")) { + if (read(fd, bf, namesz) != namesz) + break; + if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { + if (read(fd, build_id, + BUILD_ID_SIZE) == BUILD_ID_SIZE) { + err = 0; + break; + } + } else if (read(fd, bf, descsz) != descsz) + break; + } else { + int n = namesz + descsz; + if (read(fd, bf, n) != n) + break; + } + } + close(fd); +out: + return err; +} + char dso__symtab_origin(const struct dso *self) { static const char origin[] = { @@ -1218,7 +1263,7 @@ static struct map *map__new2(u64 start, struct dso *dso) return self; } -static int dsos__load_modules(void) +int dsos__load_modules(void) { char *line = NULL; size_t n; @@ -1268,6 +1313,12 @@ static int dsos__load_modules(void) goto out_delete_line; } + snprintf(name, sizeof(name), + "/sys/module/%s/notes/.note.gnu.build-id", line); + if (sysfs__read_build_id(name, dso->build_id, + sizeof(dso->build_id)) == 0) + dso->has_build_id = true; + dso->origin = DSO__ORIG_KMODULE; kernel_maps__insert(map); dsos__add(dso); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 546eb76..da7ec1a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -78,6 +78,7 @@ void dso__delete(struct dso *self); struct symbol *dso__find_symbol(struct dso *self, u64 ip); int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, int modules); +int dsos__load_modules(void); struct dso *dsos__findnew(const char *name); int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); void dsos__fprintf(FILE *fp); @@ -89,6 +90,7 @@ char dso__symtab_origin(const struct dso *self); void dso__set_build_id(struct dso *self, void *build_id); int filename__read_build_id(const char *filename, void *bf, size_t size); +int sysfs__read_build_id(const char *filename, void *bf, size_t size); bool dsos__read_build_ids(void); int build_id__sprintf(u8 *self, int len, char *bf); -- cgit v0.10.2 From 2446042c93bfc6eeebfc89e88fdef2435d2bb5c4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 18 Nov 2009 20:20:53 -0200 Subject: perf symbols: Capture the running kernel buildid too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [root@doppio linux-2.6-tip]# perf record -a -f sleep 3s ; perf buildid-list | grep vmlinux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.171 MB perf.data (~7489 samples) ] 18e7cc53db62a7d35e9d6f6c9ddc23017d38ee9a vmlinux [root@doppio linux-2.6-tip]# Several refactorings were needed so that we can have symmetry between dsos__load_modules() and dsos__load_kernel(), i.e. those functions will respectively create and add to the dsos list the loaded modules and kernel, with its buildids, but not load its symbols. That is something the subcomands that need will have to call dso__load_kernel_sym(), just like we do with modules with dsos__load_module_sym()/dso__load_module_sym(). Next csets will actually use this info to stop producing bogus results using mismatched vmlinux and .ko files. Signed-off-by: Arnaldo Carvalho de Melo Cc: Roland McGrath Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258582853-8579-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 07b92c3..6d770ac 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -948,7 +948,12 @@ static int symbol_filter(struct map *map, struct symbol *sym) static int parse_symbols(void) { - if (dsos__load_kernel(vmlinux_name, symbol_filter, 1) <= 0) + struct dso *kernel = dsos__load_kernel(); + + if (kernel == NULL) + return -1; + + if (dso__load_kernel_sym(kernel, symbol_filter, 1) <= 0) return -1; if (dump_symtab) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d3d656f..425a29b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -241,6 +241,7 @@ perf_header__adds_write(struct perf_header *self, int fd) buildid_sec = &feat_sec[idx++]; + dsos__load_kernel(); /* * Read the list of loaded modules with its build_ids */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 7b4cede..4d75e74 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1352,17 +1352,11 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, return err; } -int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, - int use_modules) +int dso__load_kernel_sym(struct dso *self, symbol_filter_t filter, int use_modules) { int err = -1; - struct dso *dso = dso__new(vmlinux); - if (dso == NULL) - return -1; - - dso->short_name = "[kernel]"; - kernel_map = map__new2(0, dso); + kernel_map = map__new2(0, self); if (kernel_map == NULL) goto out_delete_dso; @@ -1374,39 +1368,36 @@ int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, use_modules = 0; } - if (vmlinux) { - err = dso__load_vmlinux(dso, kernel_map, vmlinux, filter); - if (err > 0 && use_modules) { - int syms = dsos__load_modules_sym(filter); + err = dso__load_vmlinux(self, kernel_map, self->name, filter); + if (err > 0 && use_modules) { + int syms = dsos__load_modules_sym(filter); - if (syms < 0) - pr_warning("Failed to read module symbols!" - " Continuing...\n"); - else - err += syms; - } + if (syms < 0) + pr_warning("Failed to read module symbols!" + " Continuing...\n"); + else + err += syms; } if (err <= 0) err = kernel_maps__load_kallsyms(filter, use_modules); if (err > 0) { - struct rb_node *node = rb_first(&dso->syms); + struct rb_node *node = rb_first(&self->syms); struct symbol *sym = rb_entry(node, struct symbol, rb_node); kernel_map->start = sym->start; - node = rb_last(&dso->syms); + node = rb_last(&self->syms); sym = rb_entry(node, struct symbol, rb_node); kernel_map->end = sym->end; - dso->origin = DSO__ORIG_KERNEL; + self->origin = DSO__ORIG_KERNEL; kernel_maps__insert(kernel_map); /* * Now that we have all sorted out, just set the ->end of all * maps: */ kernel_maps__fixup_end(); - dsos__add(dso); if (verbose) kernel_maps__fprintf(stderr); @@ -1415,7 +1406,7 @@ int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, return err; out_delete_dso: - dso__delete(dso); + dso__delete(self); return -1; } @@ -1475,18 +1466,36 @@ size_t dsos__fprintf_buildid(FILE *fp) return ret; } -int load_kernel(symbol_filter_t filter) +struct dso *dsos__load_kernel(void) { - if (dsos__load_kernel(vmlinux_name, filter, modules) <= 0) - return -1; + struct dso *kernel = dso__new(vmlinux_name); + if (kernel == NULL) + return NULL; + + kernel->short_name = "[kernel]"; vdso = dso__new("[vdso]"); if (!vdso) - return -1; + return NULL; + + if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id, + sizeof(kernel->build_id)) == 0) + kernel->has_build_id = true; + dsos__add(kernel); dsos__add(vdso); - return 0; + return kernel; +} + +int load_kernel(symbol_filter_t filter) +{ + struct dso *kernel = dsos__load_kernel(); + + if (kernel == NULL) + return -1; + + return dso__load_kernel_sym(kernel, filter, modules); } void symbol__init(unsigned int priv_size) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index da7ec1a..f0593a6 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -77,10 +77,10 @@ void dso__delete(struct dso *self); struct symbol *dso__find_symbol(struct dso *self, u64 ip); -int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, int modules); int dsos__load_modules(void); struct dso *dsos__findnew(const char *name); int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); +int dso__load_kernel_sym(struct dso *self, symbol_filter_t filter, int modules); void dsos__fprintf(FILE *fp); size_t dsos__fprintf_buildid(FILE *fp); @@ -94,6 +94,7 @@ int sysfs__read_build_id(const char *filename, void *bf, size_t size); bool dsos__read_build_ids(void); int build_id__sprintf(u8 *self, int len, char *bf); +struct dso *dsos__load_kernel(void); int load_kernel(symbol_filter_t filter); void symbol__init(unsigned int priv_size); -- cgit v0.10.2 From 4dc0a04bb18fe9b80cefa08694f46a3a19ebfe50 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Nov 2009 14:55:55 -0200 Subject: perf tools: perf_header__read() shouldn't die() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And also don't call the constructor in it, this way it adheres to the model the other methods follow. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258649757-17554-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 82260c5..c97cb2c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -400,7 +400,7 @@ static int __cmd_record(int argc, const char **argv) struct stat st; pid_t pid = 0; int flags; - int ret; + int err; unsigned long waking = 0; page_size = sysconf(_SC_PAGE_SIZE); @@ -434,16 +434,18 @@ static int __cmd_record(int argc, const char **argv) exit(-1); } - if (!file_new) - header = perf_header__read(output); - else - header = perf_header__new(); - + header = perf_header__new(); if (header == NULL) { pr_err("Not enough memory for reading perf file header\n"); return -1; } + if (!file_new) { + err = perf_header__read(header, output); + if (err < 0) + return err; + } + if (raw_samples) { perf_header__set_feat(header, HEADER_TRACE_INFO); } else { @@ -527,7 +529,7 @@ static int __cmd_record(int argc, const char **argv) if (hits == samples) { if (done) break; - ret = poll(event_array, nr_poll, -1); + err = poll(event_array, nr_poll, -1); waking++; } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 665877e..dd4d82a 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1093,7 +1093,7 @@ static void process_samples(void) static int __cmd_timechart(void) { - int ret, rc = EXIT_FAILURE; + int err, rc = EXIT_FAILURE; unsigned long offset = 0; unsigned long head, shift; struct stat statbuf; @@ -1111,8 +1111,8 @@ static int __cmd_timechart(void) exit(-1); } - ret = fstat(input, &statbuf); - if (ret < 0) { + err = fstat(input, &statbuf); + if (err < 0) { perror("failed to stat file"); exit(-1); } @@ -1122,7 +1122,16 @@ static int __cmd_timechart(void) exit(0); } - header = perf_header__read(input); + header = perf_header__new(); + if (header == NULL) + return -ENOMEM; + + err = perf_header__read(header, input); + if (err < 0) { + perf_header__delete(header); + return err; + } + head = header->data_offset; sample_type = perf_header__sample_type(header); diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index 14cb846..b8fc0fa 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -106,7 +106,7 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, int *cwdlen, char **cwd) { - int ret, rc = EXIT_FAILURE; + int err, rc = EXIT_FAILURE; struct perf_header *header; unsigned long head, shift; unsigned long offset = 0; @@ -132,8 +132,8 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, exit(-1); } - ret = fstat(input, &input_stat); - if (ret < 0) { + err = fstat(input, &input_stat); + if (err < 0) { perror("failed to stat file"); exit(-1); } @@ -149,8 +149,16 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, exit(0); } - *pheader = perf_header__read(input); - header = *pheader; + header = perf_header__new(); + if (header == NULL) + return -ENOMEM; + + err = perf_header__read(header, input); + if (err < 0) { + perf_header__delete(header); + return err; + } + *pheader = header; head = header->data_offset; sample_type = perf_header__sample_type(header); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 425a29b..e66c7bd 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -78,16 +78,24 @@ struct perf_header *perf_header__new(void) return self; } +void perf_header__delete(struct perf_header *self) +{ + int i; + + for (i = 0; i < self->attrs; ++i) + perf_header_attr__delete(self->attr[i]); + + free(self->attr); + free(self); +} + int perf_header__add_attr(struct perf_header *self, struct perf_header_attr *attr) { - int pos = self->attrs; - if (self->frozen) return -1; - self->attrs++; - if (self->attrs > self->size) { + if (self->attrs == self->size) { int nsize = self->size * 2; struct perf_header_attr **nattr; @@ -98,7 +106,8 @@ int perf_header__add_attr(struct perf_header *self, self->size = nsize; self->attr = nattr; } - self->attr[pos] = attr; + + self->attr[self->attrs++] = attr; return 0; } @@ -441,19 +450,17 @@ static int perf_file_section__process(struct perf_file_section *self, return 0; } -struct perf_header *perf_header__read(int fd) +int perf_header__read(struct perf_header *self, int fd) { - struct perf_header *self = perf_header__new(); struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; int nr_attrs, nr_ids, i, j; - if (self == NULL) - die("nomem"); - - if (perf_file_header__read(&f_header, self, fd) < 0) - die("incompatible file format"); + if (perf_file_header__read(&f_header, self, fd) < 0) { + pr_debug("incompatible file format\n"); + return -EINVAL; + } nr_attrs = f_header.attrs.size / sizeof(f_attr); lseek(fd, f_header.attrs.offset, SEEK_SET); @@ -467,7 +474,7 @@ struct perf_header *perf_header__read(int fd) attr = perf_header_attr__new(&f_attr.attr); if (attr == NULL) - die("nomem"); + return -ENOMEM; nr_ids = f_attr.ids.size / sizeof(u64); lseek(fd, f_attr.ids.offset, SEEK_SET); @@ -475,11 +482,15 @@ struct perf_header *perf_header__read(int fd) for (j = 0; j < nr_ids; j++) { do_read(fd, &f_id, sizeof(f_id)); - if (perf_header_attr__add_id(attr, f_id) < 0) - die("nomem"); + if (perf_header_attr__add_id(attr, f_id) < 0) { + perf_header_attr__delete(attr); + return -ENOMEM; + } + } + if (perf_header__add_attr(self, attr) < 0) { + perf_header_attr__delete(attr); + return -ENOMEM; } - if (perf_header__add_attr(self, attr) < 0) - die("nomem"); lseek(fd, tmp, SEEK_SET); } @@ -487,8 +498,8 @@ struct perf_header *perf_header__read(int fd) if (f_header.event_types.size) { lseek(fd, f_header.event_types.offset, SEEK_SET); events = malloc(f_header.event_types.size); - if (!events) - die("nomem"); + if (events == NULL) + return -ENOMEM; do_read(fd, events, f_header.event_types.size); event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } @@ -498,8 +509,7 @@ struct perf_header *perf_header__read(int fd) lseek(fd, self->data_offset, SEEK_SET); self->frozen = 1; - - return self; + return 0; } u64 perf_header__sample_type(struct perf_header *header) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index f46a94e..dc8fedb 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -55,7 +55,10 @@ struct perf_header { DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); }; -struct perf_header *perf_header__read(int fd); +struct perf_header *perf_header__new(void); +void perf_header__delete(struct perf_header *self); + +int perf_header__read(struct perf_header *self, int fd); void perf_header__write(struct perf_header *self, int fd, bool at_exit); int perf_header__add_attr(struct perf_header *self, @@ -75,8 +78,6 @@ perf_header__find_attr(u64 id, struct perf_header *header); void perf_header__set_feat(struct perf_header *self, int feat); bool perf_header__has_feat(const struct perf_header *self, int feat); -struct perf_header *perf_header__new(void); - int perf_header__process_sections(struct perf_header *self, int fd, int (*process)(struct perf_file_section *self, int feat, int fd)); -- cgit v0.10.2 From d5eed904bb6010b429b82c47e7cdb6a32f0c1343 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Nov 2009 14:55:56 -0200 Subject: perf tools: Eliminate some more die() uses in library functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This time in perf_header__adds_write, propagating the do_write error returns. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258649757-17554-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index c97cb2c..87f98fd 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -474,8 +474,11 @@ static int __cmd_record(int argc, const char **argv) } } - if (file_new) - perf_header__write(header, output, false); + if (file_new) { + err = perf_header__write(header, output, false); + if (err < 0) + return err; + } if (!system_wide) event__synthesize_thread(pid, process_synthesized_event); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e66c7bd..d5c81eb 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -176,7 +176,7 @@ static int do_write(int fd, const void *buf, size_t size) int ret = write(fd, buf, size); if (ret < 0) - return -1; + return -errno; size -= ret; buf += ret; @@ -190,6 +190,7 @@ static int dsos__write_buildid_table(int fd) struct dso *pos; list_for_each_entry(pos, &dsos, node) { + int err; struct build_id_event b; size_t len; @@ -200,33 +201,35 @@ static int dsos__write_buildid_table(int fd) memset(&b, 0, sizeof(b)); memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); b.header.size = sizeof(b) + len; - if (do_write(fd, &b, sizeof(b)) < 0 || - do_write(fd, pos->long_name, len) < 0) - return -1; + err = do_write(fd, &b, sizeof(b)); + if (err < 0) + return err; + err = do_write(fd, pos->long_name, len); + if (err < 0) + return err; } return 0; } -static void -perf_header__adds_write(struct perf_header *self, int fd) +static int perf_header__adds_write(struct perf_header *self, int fd) { int nr_sections; struct perf_file_section *feat_sec; int sec_size; u64 sec_start; - int idx = 0; + int idx = 0, err; if (dsos__read_build_ids()) perf_header__set_feat(self, HEADER_BUILD_ID); nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); if (!nr_sections) - return; + return 0; feat_sec = calloc(sizeof(*feat_sec), nr_sections); - if (!feat_sec) - die("No memory"); + if (feat_sec == NULL) + return -ENOMEM; sec_size = sizeof(*feat_sec) * nr_sections; @@ -258,23 +261,29 @@ perf_header__adds_write(struct perf_header *self, int fd) /* Write build-ids */ buildid_sec->offset = lseek(fd, 0, SEEK_CUR); - if (dsos__write_buildid_table(fd) < 0) - die("failed to write buildid table"); + err = dsos__write_buildid_table(fd); + if (err < 0) { + pr_debug("failed to write buildid table\n"); + goto out_free; + } buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset; } lseek(fd, sec_start, SEEK_SET); - if (do_write(fd, feat_sec, sec_size) < 0) - die("failed to write feature section"); + err = do_write(fd, feat_sec, sec_size); + if (err < 0) + pr_debug("failed to write feature section\n"); +out_free: free(feat_sec); + return err; } -void perf_header__write(struct perf_header *self, int fd, bool at_exit) +int perf_header__write(struct perf_header *self, int fd, bool at_exit) { struct perf_file_header f_header; struct perf_file_attr f_attr; struct perf_header_attr *attr; - int i; + int i, err; lseek(fd, sizeof(f_header), SEEK_SET); @@ -283,8 +292,11 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit) attr = self->attr[i]; attr->id_offset = lseek(fd, 0, SEEK_CUR); - if (do_write(fd, attr->id, attr->ids * sizeof(u64)) < 0) - die("failed to write perf header"); + err = do_write(fd, attr->id, attr->ids * sizeof(u64)); + if (err < 0) { + pr_debug("failed to write perf header\n"); + return err; + } } @@ -300,20 +312,30 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit) .size = attr->ids * sizeof(u64), } }; - if (do_write(fd, &f_attr, sizeof(f_attr)) < 0) - die("failed to write perf header attribute"); + err = do_write(fd, &f_attr, sizeof(f_attr)); + if (err < 0) { + pr_debug("failed to write perf header attribute\n"); + return err; + } } self->event_offset = lseek(fd, 0, SEEK_CUR); self->event_size = event_count * sizeof(struct perf_trace_event_type); - if (events) - if (do_write(fd, events, self->event_size) < 0) - die("failed to write perf header events"); + if (events) { + err = do_write(fd, events, self->event_size); + if (err < 0) { + pr_debug("failed to write perf header events\n"); + return err; + } + } self->data_offset = lseek(fd, 0, SEEK_CUR); - if (at_exit) - perf_header__adds_write(self, fd); + if (at_exit) { + err = perf_header__adds_write(self, fd); + if (err < 0) + return err; + } f_header = (struct perf_file_header){ .magic = PERF_MAGIC, @@ -336,11 +358,15 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit) memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features)); lseek(fd, 0, SEEK_SET); - if (do_write(fd, &f_header, sizeof(f_header)) < 0) - die("failed to write perf header"); + err = do_write(fd, &f_header, sizeof(f_header)); + if (err < 0) { + pr_debug("failed to write perf header\n"); + return err; + } lseek(fd, self->data_offset + self->data_size, SEEK_SET); self->frozen = 1; + return 0; } static void do_read(int fd, void *buf, size_t size) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index dc8fedb..d1dbe2b 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -59,7 +59,7 @@ struct perf_header *perf_header__new(void); void perf_header__delete(struct perf_header *self); int perf_header__read(struct perf_header *self, int fd); -void perf_header__write(struct perf_header *self, int fd, bool at_exit); +int perf_header__write(struct perf_header *self, int fd, bool at_exit); int perf_header__add_attr(struct perf_header *self, struct perf_header_attr *attr); -- cgit v0.10.2 From 6b0cb5f9f7033c72b19697c33deab83f0dd9848d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Nov 2009 14:55:57 -0200 Subject: perf tools: Don't die() in mmap_dispatch_perf_file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Propagate the error, that, interestingly, are already handled by all callers :-) Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258649757-17554-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index b8fc0fa..5543e7d 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -106,7 +106,7 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, int *cwdlen, char **cwd) { - int err, rc = EXIT_FAILURE; + int err; struct perf_header *header; unsigned long head, shift; unsigned long offset = 0; @@ -118,64 +118,69 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, int input; char *buf; - if (!curr_handler) - die("Forgot to register perf file handler"); + if (curr_handler == NULL) { + pr_debug("Forgot to register perf file handler\n"); + return -EINVAL; + } page_size = getpagesize(); input = open(input_name, O_RDONLY); if (input < 0) { - fprintf(stderr, " failed to open file: %s", input_name); + pr_err("Failed to open file: %s", input_name); if (!strcmp(input_name, "perf.data")) - fprintf(stderr, " (try 'perf record' first)"); - fprintf(stderr, "\n"); - exit(-1); + pr_err(" (try 'perf record' first)"); + pr_err("\n"); + return -errno; } - err = fstat(input, &input_stat); - if (err < 0) { - perror("failed to stat file"); - exit(-1); + if (fstat(input, &input_stat) < 0) { + pr_err("failed to stat file"); + err = -errno; + goto out_close; } + err = -EACCES; if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { - fprintf(stderr, "file: %s not owned by current user or root\n", + pr_err("file: %s not owned by current user or root\n", input_name); - exit(-1); + goto out_close; } - if (!input_stat.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); + if (input_stat.st_size == 0) { + pr_info("zero-sized file, nothing to do!\n"); + goto done; } + err = -ENOMEM; header = perf_header__new(); if (header == NULL) - return -ENOMEM; + goto out_close; err = perf_header__read(header, input); - if (err < 0) { - perf_header__delete(header); - return err; - } + if (err < 0) + goto out_delete; *pheader = header; head = header->data_offset; sample_type = perf_header__sample_type(header); - if (curr_handler->sample_type_check) - if (curr_handler->sample_type_check(sample_type) < 0) - exit(-1); + err = -EINVAL; + if (curr_handler->sample_type_check && + curr_handler->sample_type_check(sample_type) < 0) + goto out_delete; + err = -ENOMEM; if (load_kernel(NULL) < 0) { - perror("failed to load kernel symbols"); - return EXIT_FAILURE; + pr_err("failed to load kernel symbols\n"); + goto out_delete; } if (!full_paths) { if (getcwd(__cwd, sizeof(__cwd)) == NULL) { - perror("failed to get the current directory"); - return EXIT_FAILURE; + pr_err("failed to get the current directory\n"); + err = -errno; + goto out_delete; } *cwd = __cwd; *cwdlen = strlen(*cwd); @@ -189,11 +194,12 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, head -= shift; remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); + buf = mmap(NULL, page_size * mmap_window, PROT_READ, + MAP_SHARED, input, offset); if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); + pr_err("failed to mmap file\n"); + err = -errno; + goto out_delete; } more: @@ -250,10 +256,12 @@ more: goto more; done: - rc = EXIT_SUCCESS; + err = 0; +out_close: close(input); - return rc; + return err; +out_delete: + perf_header__delete(header); + goto out_close; } - - -- cgit v0.10.2 From ce64c62074d945fe5f8a7f01bdc30125f994ea67 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 16 Nov 2009 18:06:31 -0500 Subject: x86: Instruction decoder test should generate build warning Since some instructions are not decoded correctly by older versions of objdump, it may cause false positive error in insn decoder posttest. This changes build error of insn decoder test to build warning. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Stephen Rothwell Cc: Randy Dunlap Cc: Jim Keniston Cc: Stephen Rothwell LKML-Reference: <20091116230631.5250.41579.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index af75e07..d8214dc 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -114,6 +114,7 @@ int main(int argc, char **argv) unsigned char insn_buf[16]; struct insn insn; int insns = 0, c; + int warnings = 0; parse_args(argc, argv); @@ -151,18 +152,22 @@ int main(int argc, char **argv) insn_init(&insn, insn_buf, x86_64); insn_get_length(&insn); if (insn.length != nb) { - fprintf(stderr, "Error: %s found a difference at %s\n", + warnings++; + fprintf(stderr, "Warning: %s found difference at %s\n", prog, sym); - fprintf(stderr, "Error: %s", line); - fprintf(stderr, "Error: objdump says %d bytes, but " + fprintf(stderr, "Warning: %s", line); + fprintf(stderr, "Warning: objdump says %d bytes, but " "insn_get_length() says %d\n", nb, insn.length); if (verbose) dump_insn(stderr, &insn); - exit(2); } } - fprintf(stderr, "Succeed: decoded and checked %d instructions\n", - insns); + if (warnings) + fprintf(stderr, "Warning: decoded and checked %d" + " instructions with %d warnings\n", insns, warnings); + else + fprintf(stderr, "Succeed: decoded and checked %d" + " instructions\n", insns); return 0; } -- cgit v0.10.2 From ba77c9e11111a172c9e8687fe16a6a173a61916f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 20 Nov 2009 15:53:25 +0800 Subject: perf: Add 'perf kmem' tool This tool is mostly a perf version of kmemtrace-user. The following information is provided by this tool: - the total amount of memory allocated and fragmentation per call-site - the total amount of memory allocated and fragmentation per allocation - total memory allocated and fragmentation in the collected dataset - ... Sample output: # ./perf kmem record ^C # ./perf kmem --stat caller --stat alloc -l 10 ------------------------------------------------------------------------------ Callsite | Total_alloc/Per | Total_req/Per | Hit | Fragmentation ------------------------------------------------------------------------------ 0xc052f37a | 790528/4096 | 790528/4096 | 193 | 0.000% 0xc0541d70 | 524288/4096 | 524288/4096 | 128 | 0.000% 0xc051cc68 | 481600/200 | 481600/200 | 2408 | 0.000% 0xc0572623 | 297444/676 | 297440/676 | 440 | 0.001% 0xc05399f1 | 73476/164 | 73472/164 | 448 | 0.005% 0xc05243bf | 51456/256 | 51456/256 | 201 | 0.000% 0xc0730d0e | 31844/497 | 31808/497 | 64 | 0.113% 0xc0734c4e | 17152/256 | 17152/256 | 67 | 0.000% 0xc0541a6d | 16384/128 | 16384/128 | 128 | 0.000% 0xc059c217 | 13120/40 | 13120/40 | 328 | 0.000% 0xc0501ee6 | 11264/88 | 11264/88 | 128 | 0.000% 0xc04daef0 | 7504/682 | 7128/648 | 11 | 5.011% 0xc04e14a3 | 4216/191 | 4216/191 | 22 | 0.000% 0xc05041ca | 3524/44 | 3520/44 | 80 | 0.114% 0xc0734fa3 | 2104/701 | 1620/540 | 3 | 23.004% 0xc05ec9f1 | 2024/289 | 2016/288 | 7 | 0.395% 0xc06a1999 | 1792/256 | 1792/256 | 7 | 0.000% 0xc0463b9a | 1584/144 | 1584/144 | 11 | 0.000% 0xc0541eb0 | 1024/16 | 1024/16 | 64 | 0.000% 0xc06a19ac | 896/128 | 896/128 | 7 | 0.000% 0xc05721c0 | 772/12 | 768/12 | 64 | 0.518% 0xc054d1e6 | 288/57 | 280/56 | 5 | 2.778% 0xc04b562e | 157/31 | 154/30 | 5 | 1.911% 0xc04b536f | 80/16 | 80/16 | 5 | 0.000% 0xc05855a0 | 64/64 | 36/36 | 1 | 43.750% ------------------------------------------------------------------------------ ------------------------------------------------------------------------------ Alloc Ptr | Total_alloc/Per | Total_req/Per | Hit | Fragmentation ------------------------------------------------------------------------------ 0xda884000 | 1052672/4096 | 1052672/4096 | 257 | 0.000% 0xda886000 | 262144/4096 | 262144/4096 | 64 | 0.000% 0xf60c7c00 | 16512/128 | 16512/128 | 129 | 0.000% 0xf59a4118 | 13120/40 | 13120/40 | 328 | 0.000% 0xdfd4b2c0 | 11264/88 | 11264/88 | 128 | 0.000% 0xf5274600 | 7680/256 | 7680/256 | 30 | 0.000% 0xe8395000 | 5948/594 | 5464/546 | 10 | 8.137% 0xe59c3c00 | 5748/479 | 5712/476 | 12 | 0.626% 0xf4cd1a80 | 3524/44 | 3520/44 | 80 | 0.114% 0xe5bd1600 | 2892/482 | 2856/476 | 6 | 1.245% ... | ... | ... | ... | ... ------------------------------------------------------------------------------ SUMMARY ======= Total bytes requested: 2333626 Total bytes allocated: 2353712 Total bytes wasted on internal fragmentation: 20086 Internal fragmentation: 0.853375% TODO: - show sym+offset in 'callsite' column - show cross node allocation stats - collect more useful stats? - ... Signed-off-by: Li Zefan Acked-by: Pekka Enberg Acked-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Eduard - Gabriel Munteanu Cc: linux-mm@kvack.org LKML-Reference: <4B064AF5.9060208@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3f0666a..d7198c5 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -444,6 +444,7 @@ BUILTIN_OBJS += builtin-timechart.o BUILTIN_OBJS += builtin-top.o BUILTIN_OBJS += builtin-trace.o BUILTIN_OBJS += builtin-probe.o +BUILTIN_OBJS += builtin-kmem.o PERFLIBS = $(LIB_FILE) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c new file mode 100644 index 0000000..f315b05 --- /dev/null +++ b/tools/perf/builtin-kmem.c @@ -0,0 +1,578 @@ +#include "builtin.h" +#include "perf.h" + +#include "util/util.h" +#include "util/cache.h" +#include "util/symbol.h" +#include "util/thread.h" +#include "util/header.h" + +#include "util/parse-options.h" +#include "util/trace-event.h" + +#include "util/debug.h" +#include "util/data_map.h" + +#include + +struct alloc_stat; +typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); + +static char const *input_name = "perf.data"; + +static struct perf_header *header; +static u64 sample_type; + +static int alloc_flag; +static int caller_flag; + +sort_fn_t alloc_sort_fn; +sort_fn_t caller_sort_fn; + +static int alloc_lines = -1; +static int caller_lines = -1; + +static char *cwd; +static int cwdlen; + +struct alloc_stat { + union { + struct { + char *name; + u64 call_site; + }; + u64 ptr; + }; + u64 bytes_req; + u64 bytes_alloc; + u32 hit; + + struct rb_node node; +}; + +static struct rb_root root_alloc_stat; +static struct rb_root root_alloc_sorted; +static struct rb_root root_caller_stat; +static struct rb_root root_caller_sorted; + +static unsigned long total_requested, total_allocated; + +struct raw_event_sample { + u32 size; + char data[0]; +}; + +static int +process_comm_event(event_t *event, unsigned long offset, unsigned long head) +{ + struct thread *thread = threads__findnew(event->comm.pid); + + dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->comm.comm, event->comm.pid); + + if (thread == NULL || + thread__set_comm(thread, event->comm.comm)) { + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); + return -1; + } + + return 0; +} + +static void insert_alloc_stat(unsigned long ptr, + int bytes_req, int bytes_alloc) +{ + struct rb_node **node = &root_alloc_stat.rb_node; + struct rb_node *parent = NULL; + struct alloc_stat *data = NULL; + + if (!alloc_flag) + return; + + while (*node) { + parent = *node; + data = rb_entry(*node, struct alloc_stat, node); + + if (ptr > data->ptr) + node = &(*node)->rb_right; + else if (ptr < data->ptr) + node = &(*node)->rb_left; + else + break; + } + + if (data && data->ptr == ptr) { + data->hit++; + data->bytes_req += bytes_req; + data->bytes_alloc += bytes_req; + } else { + data = malloc(sizeof(*data)); + data->ptr = ptr; + data->hit = 1; + data->bytes_req = bytes_req; + data->bytes_alloc = bytes_alloc; + + rb_link_node(&data->node, parent, node); + rb_insert_color(&data->node, &root_alloc_stat); + } +} + +static void insert_caller_stat(unsigned long call_site, + int bytes_req, int bytes_alloc) +{ + struct rb_node **node = &root_caller_stat.rb_node; + struct rb_node *parent = NULL; + struct alloc_stat *data = NULL; + + if (!caller_flag) + return; + + while (*node) { + parent = *node; + data = rb_entry(*node, struct alloc_stat, node); + + if (call_site > data->call_site) + node = &(*node)->rb_right; + else if (call_site < data->call_site) + node = &(*node)->rb_left; + else + break; + } + + if (data && data->call_site == call_site) { + data->hit++; + data->bytes_req += bytes_req; + data->bytes_alloc += bytes_req; + } else { + data = malloc(sizeof(*data)); + data->call_site = call_site; + data->hit = 1; + data->bytes_req = bytes_req; + data->bytes_alloc = bytes_alloc; + + rb_link_node(&data->node, parent, node); + rb_insert_color(&data->node, &root_caller_stat); + } +} + +static void process_alloc_event(struct raw_event_sample *raw, + struct event *event, + int cpu __used, + u64 timestamp __used, + struct thread *thread __used, + int node __used) +{ + unsigned long call_site; + unsigned long ptr; + int bytes_req; + int bytes_alloc; + + ptr = raw_field_value(event, "ptr", raw->data); + call_site = raw_field_value(event, "call_site", raw->data); + bytes_req = raw_field_value(event, "bytes_req", raw->data); + bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); + + insert_alloc_stat(ptr, bytes_req, bytes_alloc); + insert_caller_stat(call_site, bytes_req, bytes_alloc); + + total_requested += bytes_req; + total_allocated += bytes_alloc; +} + +static void process_free_event(struct raw_event_sample *raw __used, + struct event *event __used, + int cpu __used, + u64 timestamp __used, + struct thread *thread __used) +{ +} + +static void +process_raw_event(event_t *raw_event __used, void *more_data, + int cpu, u64 timestamp, struct thread *thread) +{ + struct raw_event_sample *raw = more_data; + struct event *event; + int type; + + type = trace_parse_common_type(raw->data); + event = trace_find_event(type); + + if (!strcmp(event->name, "kmalloc") || + !strcmp(event->name, "kmem_cache_alloc")) { + process_alloc_event(raw, event, cpu, timestamp, thread, 0); + return; + } + + if (!strcmp(event->name, "kmalloc_node") || + !strcmp(event->name, "kmem_cache_alloc_node")) { + process_alloc_event(raw, event, cpu, timestamp, thread, 1); + return; + } + + if (!strcmp(event->name, "kfree") || + !strcmp(event->name, "kmem_cache_free")) { + process_free_event(raw, event, cpu, timestamp, thread); + return; + } +} + +static int +process_sample_event(event_t *event, unsigned long offset, unsigned long head) +{ + u64 ip = event->ip.ip; + u64 timestamp = -1; + u32 cpu = -1; + u64 period = 1; + void *more_data = event->ip.__more_data; + struct thread *thread = threads__findnew(event->ip.pid); + + if (sample_type & PERF_SAMPLE_TIME) { + timestamp = *(u64 *)more_data; + more_data += sizeof(u64); + } + + if (sample_type & PERF_SAMPLE_CPU) { + cpu = *(u32 *)more_data; + more_data += sizeof(u32); + more_data += sizeof(u32); /* reserved */ + } + + if (sample_type & PERF_SAMPLE_PERIOD) { + period = *(u64 *)more_data; + more_data += sizeof(u64); + } + + dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->header.misc, + event->ip.pid, event->ip.tid, + (void *)(long)ip, + (long long)period); + + if (thread == NULL) { + pr_debug("problem processing %d event, skipping it.\n", + event->header.type); + return -1; + } + + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + + process_raw_event(event, more_data, cpu, timestamp, thread); + + return 0; +} + +static int sample_type_check(u64 type) +{ + sample_type = type; + + if (!(sample_type & PERF_SAMPLE_RAW)) { + fprintf(stderr, + "No trace sample to read. Did you call perf record " + "without -R?"); + return -1; + } + + return 0; +} + +static struct perf_file_handler file_handler = { + .process_sample_event = process_sample_event, + .process_comm_event = process_comm_event, + .sample_type_check = sample_type_check, +}; + +static int read_events(void) +{ + register_idle_thread(); + register_perf_file_handler(&file_handler); + + return mmap_dispatch_perf_file(&header, input_name, 0, 0, + &cwdlen, &cwd); +} + +static double fragmentation(unsigned long n_req, unsigned long n_alloc) +{ + if (n_alloc == 0) + return 0.0; + else + return 100.0 - (100.0 * n_req / n_alloc); +} + +static void __print_result(struct rb_root *root, int n_lines, int is_caller) +{ + struct rb_node *next; + + printf("\n ------------------------------------------------------------------------------\n"); + if (is_caller) + printf(" Callsite |"); + else + printf(" Alloc Ptr |"); + printf(" Total_alloc/Per | Total_req/Per | Hit | Fragmentation\n"); + printf(" ------------------------------------------------------------------------------\n"); + + next = rb_first(root); + + while (next && n_lines--) { + struct alloc_stat *data; + + data = rb_entry(next, struct alloc_stat, node); + + printf(" %-16p | %8llu/%-6lu | %8llu/%-6lu | %6lu | %8.3f%%\n", + is_caller ? (void *)(unsigned long)data->call_site : + (void *)(unsigned long)data->ptr, + (unsigned long long)data->bytes_alloc, + (unsigned long)data->bytes_alloc / data->hit, + (unsigned long long)data->bytes_req, + (unsigned long)data->bytes_req / data->hit, + (unsigned long)data->hit, + fragmentation(data->bytes_req, data->bytes_alloc)); + + next = rb_next(next); + } + + if (n_lines == -1) + printf(" ... | ... | ... | ... | ... \n"); + + printf(" ------------------------------------------------------------------------------\n"); +} + +static void print_summary(void) +{ + printf("\nSUMMARY\n=======\n"); + printf("Total bytes requested: %lu\n", total_requested); + printf("Total bytes allocated: %lu\n", total_allocated); + printf("Total bytes wasted on internal fragmentation: %lu\n", + total_allocated - total_requested); + printf("Internal fragmentation: %f%%\n", + fragmentation(total_requested, total_allocated)); +} + +static void print_result(void) +{ + if (caller_flag) + __print_result(&root_caller_sorted, caller_lines, 1); + if (alloc_flag) + __print_result(&root_alloc_sorted, alloc_lines, 0); + print_summary(); +} + +static void sort_insert(struct rb_root *root, struct alloc_stat *data, + sort_fn_t sort_fn) +{ + struct rb_node **new = &(root->rb_node); + struct rb_node *parent = NULL; + + while (*new) { + struct alloc_stat *this; + int cmp; + + this = rb_entry(*new, struct alloc_stat, node); + parent = *new; + + cmp = sort_fn(data, this); + + if (cmp > 0) + new = &((*new)->rb_left); + else + new = &((*new)->rb_right); + } + + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, + sort_fn_t sort_fn) +{ + struct rb_node *node; + struct alloc_stat *data; + + for (;;) { + node = rb_first(root); + if (!node) + break; + + rb_erase(node, root); + data = rb_entry(node, struct alloc_stat, node); + sort_insert(root_sorted, data, sort_fn); + } +} + +static void sort_result(void) +{ + __sort_result(&root_alloc_stat, &root_alloc_sorted, alloc_sort_fn); + __sort_result(&root_caller_stat, &root_caller_sorted, caller_sort_fn); +} + +static int __cmd_kmem(void) +{ + setup_pager(); + read_events(); + sort_result(); + print_result(); + + return 0; +} + +static const char * const kmem_usage[] = { + "perf kmem [] {record}", + NULL +}; + + +static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + if (l->ptr < r->ptr) + return -1; + else if (l->ptr > r->ptr) + return 1; + return 0; +} + +static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + if (l->call_site < r->call_site) + return -1; + else if (l->call_site > r->call_site) + return 1; + return 0; +} + +static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + if (l->bytes_alloc < r->bytes_alloc) + return -1; + else if (l->bytes_alloc > r->bytes_alloc) + return 1; + return 0; +} + +static int parse_sort_opt(const struct option *opt __used, + const char *arg, int unset __used) +{ + sort_fn_t sort_fn; + + if (!arg) + return -1; + + if (strcmp(arg, "ptr") == 0) + sort_fn = ptr_cmp; + else if (strcmp(arg, "call_site") == 0) + sort_fn = callsite_cmp; + else if (strcmp(arg, "bytes") == 0) + sort_fn = bytes_cmp; + else + return -1; + + if (caller_flag > alloc_flag) + caller_sort_fn = sort_fn; + else + alloc_sort_fn = sort_fn; + + return 0; +} + +static int parse_stat_opt(const struct option *opt __used, + const char *arg, int unset __used) +{ + if (!arg) + return -1; + + if (strcmp(arg, "alloc") == 0) + alloc_flag = (caller_flag + 1); + else if (strcmp(arg, "caller") == 0) + caller_flag = (alloc_flag + 1); + else + return -1; + return 0; +} + +static int parse_line_opt(const struct option *opt __used, + const char *arg, int unset __used) +{ + int lines; + + if (!arg) + return -1; + + lines = strtoul(arg, NULL, 10); + + if (caller_flag > alloc_flag) + caller_lines = lines; + else + alloc_lines = lines; + + return 0; +} + +static const struct option kmem_options[] = { + OPT_STRING('i', "input", &input_name, "file", + "input file name"), + OPT_CALLBACK(0, "stat", NULL, "|", + "stat selector, Pass 'alloc' or 'caller'.", + parse_stat_opt), + OPT_CALLBACK('s', "sort", NULL, "key", + "sort by key: ptr, call_site, hit, bytes", + parse_sort_opt), + OPT_CALLBACK('l', "line", NULL, "num", + "show n lins", + parse_line_opt), + OPT_END() +}; + +static const char *record_args[] = { + "record", + "-a", + "-R", + "-M", + "-f", + "-c", "1", + "-e", "kmem:kmalloc", + "-e", "kmem:kmalloc_node", + "-e", "kmem:kfree", + "-e", "kmem:kmem_cache_alloc", + "-e", "kmem:kmem_cache_alloc_node", + "-e", "kmem:kmem_cache_free", +}; + +static int __cmd_record(int argc, const char **argv) +{ + unsigned int rec_argc, i, j; + const char **rec_argv; + + rec_argc = ARRAY_SIZE(record_args) + argc - 1; + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + + for (i = 0; i < ARRAY_SIZE(record_args); i++) + rec_argv[i] = strdup(record_args[i]); + + for (j = 1; j < (unsigned int)argc; j++, i++) + rec_argv[i] = argv[j]; + + return cmd_record(i, rec_argv, NULL); +} + +int cmd_kmem(int argc, const char **argv, const char *prefix __used) +{ + symbol__init(0); + + argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); + + if (argc && !strncmp(argv[0], "rec", 3)) + return __cmd_record(argc, argv); + else if (argc) + usage_with_options(kmem_usage, kmem_options); + + if (!alloc_sort_fn) + alloc_sort_fn = bytes_cmp; + if (!caller_sort_fn) + caller_sort_fn = bytes_cmp; + + return __cmd_kmem(); +} + diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 9b02d85..a3d8bf6 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -28,5 +28,6 @@ extern int cmd_top(int argc, const char **argv, const char *prefix); extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); extern int cmd_probe(int argc, const char **argv, const char *prefix); +extern int cmd_kmem(int argc, const char **argv, const char *prefix); #endif diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 89b82ac..cf64049 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -285,20 +285,21 @@ static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; static struct cmd_struct commands[] = { - { "help", cmd_help, 0 }, - { "list", cmd_list, 0 }, { "buildid-list", cmd_buildid_list, 0 }, - { "record", cmd_record, 0 }, - { "report", cmd_report, 0 }, - { "bench", cmd_bench, 0 }, - { "stat", cmd_stat, 0 }, - { "timechart", cmd_timechart, 0 }, - { "top", cmd_top, 0 }, - { "annotate", cmd_annotate, 0 }, - { "version", cmd_version, 0 }, - { "trace", cmd_trace, 0 }, - { "sched", cmd_sched, 0 }, - { "probe", cmd_probe, 0 }, + { "help", cmd_help, 0 }, + { "list", cmd_list, 0 }, + { "record", cmd_record, 0 }, + { "report", cmd_report, 0 }, + { "bench", cmd_bench, 0 }, + { "stat", cmd_stat, 0 }, + { "timechart", cmd_timechart, 0 }, + { "top", cmd_top, 0 }, + { "annotate", cmd_annotate, 0 }, + { "version", cmd_version, 0 }, + { "trace", cmd_trace, 0 }, + { "sched", cmd_sched, 0 }, + { "probe", cmd_probe, 0 }, + { "kmem", cmd_kmem, 0 }, }; unsigned int i; static const char ext[] = STRIP_EXTENSION; -- cgit v0.10.2 From 34769945f7cd9ab470413ffe64426e3ad069f49e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 Nov 2009 11:46:21 +0100 Subject: genirq: Fix spurious irq seqfile conversion single_open data argument must be PDE(inode)->data instead of NULL otherwise seq_file->private is always NULL and we always read the spurious data of irq 0. Reported-by: Alexey Dobriyan Signed-off-by: Thomas Gleixner diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 4e47f11..0832145 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -136,7 +136,7 @@ out: static int default_affinity_open(struct inode *inode, struct file *file) { - return single_open(file, default_affinity_show, NULL); + return single_open(file, default_affinity_show, PDE(inode)->data); } static const struct file_operations default_affinity_proc_fops = { -- cgit v0.10.2 From 80509e27e40d7554e576405ed9f5b7966c567112 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 20 Nov 2009 12:13:08 -0500 Subject: x86: Fix insn decoder test typos Fix postest_verbose to posttest_verbose, and add posttest_64bit option for CONFIG_64BIT != y, since old command just passed '-' instead of '-n' when CONFIG_64BIT is not set. Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Stephen Rothwell Cc: Randy Dunlap Cc: Jim Keniston LKML-Reference: <20091120171307.6715.66099.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index 4688f90..c80b079 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -1,13 +1,19 @@ PHONY += posttest ifeq ($(KBUILD_VERBOSE),1) - postest_verbose = -v + posttest_verbose = -v else - postest_verbose = + posttest_verbose = +endif + +ifeq ($(CONFIG_64BIT),y) + posttest_64bit = -y +else + posttest_64bit = -n endif quiet_cmd_posttest = TEST $@ - cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | awk -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len -$(CONFIG_64BIT) $(posttest_verbose) + cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) posttest: $(obj)/test_get_len vmlinux $(call cmd,posttest) -- cgit v0.10.2 From 6f5f67267dc4faecd9cba63894de92ca92a608b8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 20 Nov 2009 12:13:14 -0500 Subject: x86: insn decoder test checks objdump version Check objdump version before using it for insn decoder build test, because some older objdump can't decode AVX code correctly. Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Stephen Rothwell Cc: Randy Dunlap Cc: Jim Keniston LKML-Reference: <20091120171314.6715.30390.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile index c80b079..f820826 100644 --- a/arch/x86/tools/Makefile +++ b/arch/x86/tools/Makefile @@ -12,8 +12,11 @@ else posttest_64bit = -n endif +distill_awk = $(srctree)/arch/x86/tools/distill.awk +chkobjdump = $(srctree)/arch/x86/tools/chkobjdump.awk + quiet_cmd_posttest = TEST $@ - cmd_posttest = $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(srctree)/arch/x86/tools/distill.awk | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) + cmd_posttest = ($(OBJDUMP) -v | $(AWK) -f $(chkobjdump)) || $(OBJDUMP) -d -j .text $(objtree)/vmlinux | $(AWK) -f $(distill_awk) | $(obj)/test_get_len $(posttest_64bit) $(posttest_verbose) posttest: $(obj)/test_get_len vmlinux $(call cmd,posttest) diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk new file mode 100644 index 0000000..0d13cd9 --- /dev/null +++ b/arch/x86/tools/chkobjdump.awk @@ -0,0 +1,23 @@ +# GNU objdump version checker +# +# Usage: +# objdump -v | awk -f chkobjdump.awk +BEGIN { + # objdump version 2.19 or later is OK for the test. + od_ver = 2; + od_sver = 19; +} + +/^GNU/ { + split($4, ver, "."); + if (ver[1] > od_ver || + (ver[1] == od_ver && ver[2] >= od_sver)) { + exit 1; + } else { + printf("Warning: objdump version %s is older than %d.%d\n", + $4, od_ver, od_sver); + print("Warning: Skipping posttest."); + # Logic is inverted, because we just skip test without error. + exit 0; + } +} -- cgit v0.10.2 From 6671cb1674e69e2aba3d610714bdd3e97a7b51ff Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Nov 2009 20:51:24 -0200 Subject: perf symbols: Remove unrelated actions from dso__load_kernel_sym MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It should just load kernel symbols, not load the list of modules. There are more stuff to move to other routines, but lets do it in several steps. End goal is to be able to defer symbol table loading till we find a hit for that map address range. So that the kernel & modules are handled just like all the other DSOs in the system. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258757489-5978-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 77d50a6..b6da147 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -33,6 +33,7 @@ static int input; static int full_paths; static int print_line; +static bool use_modules; static unsigned long page_size; static unsigned long mmap_window = 32; @@ -636,7 +637,7 @@ static int __cmd_annotate(void) exit(0); } - if (load_kernel(symbol_filter) < 0) { + if (load_kernel(symbol_filter, use_modules) < 0) { perror("failed to load kernel symbols"); return EXIT_FAILURE; } @@ -742,7 +743,7 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), - OPT_BOOLEAN('m', "modules", &modules, + OPT_BOOLEAN('m', "modules", &use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('l', "print-line", &print_line, "print matching source lines (may be slow)"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1a806d5..0af4840 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -38,6 +38,7 @@ static char *dso_list_str, *comm_list_str, *sym_list_str, static struct strlist *dso_list, *comm_list, *sym_list; static int force; +static bool use_modules; static int full_paths; static int show_nr_samples; @@ -1023,7 +1024,7 @@ static const struct option options[] = { "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), - OPT_BOOLEAN('m', "modules", &modules, + OPT_BOOLEAN('m', "modules", &use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, "Show a column with the number of samples"), diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6d770ac..48cc108 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -953,8 +953,16 @@ static int parse_symbols(void) if (kernel == NULL) return -1; + if (dsos__load_modules() < 0) + pr_debug("Couldn't read the complete list of modules, " + "continuing...\n"); + + if (dsos__load_modules_sym(symbol_filter) < 0) + pr_warning("Failed to read module symbols, continuing...\n"); + if (dso__load_kernel_sym(kernel, symbol_filter, 1) <= 0) - return -1; + pr_debug("Couldn't read the complete list of kernel symbols, " + "continuing...\n"); if (dump_symtab) dsos__fprintf(stderr); diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index 5543e7d..a444a26 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -171,7 +171,7 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, goto out_delete; err = -ENOMEM; - if (load_kernel(NULL) < 0) { + if (load_kernel(NULL, 1) < 0) { pr_err("failed to load kernel symbols\n"); goto out_delete; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 4d75e74..3b23c18 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1230,7 +1230,7 @@ failure: return -1; } -static int dsos__load_modules_sym(symbol_filter_t filter) +int dsos__load_modules_sym(symbol_filter_t filter) { struct utsname uts; char modules_path[PATH_MAX]; @@ -1352,33 +1352,18 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, return err; } -int dso__load_kernel_sym(struct dso *self, symbol_filter_t filter, int use_modules) +int dso__load_kernel_sym(struct dso *self, symbol_filter_t filter, + int use_modules) { - int err = -1; + int err; kernel_map = map__new2(0, self); if (kernel_map == NULL) - goto out_delete_dso; + return -1; kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; - if (use_modules && dsos__load_modules() < 0) { - pr_warning("Failed to load list of modules in use! " - "Continuing...\n"); - use_modules = 0; - } - err = dso__load_vmlinux(self, kernel_map, self->name, filter); - if (err > 0 && use_modules) { - int syms = dsos__load_modules_sym(filter); - - if (syms < 0) - pr_warning("Failed to read module symbols!" - " Continuing...\n"); - else - err += syms; - } - if (err <= 0) err = kernel_maps__load_kallsyms(filter, use_modules); @@ -1404,17 +1389,12 @@ int dso__load_kernel_sym(struct dso *self, symbol_filter_t filter, int use_modul } return err; - -out_delete_dso: - dso__delete(self); - return -1; } LIST_HEAD(dsos); struct dso *vdso; const char *vmlinux_name = "vmlinux"; -int modules; static void dsos__add(struct dso *dso) { @@ -1488,14 +1468,26 @@ struct dso *dsos__load_kernel(void) return kernel; } -int load_kernel(symbol_filter_t filter) +int load_kernel(symbol_filter_t filter, bool use_modules) { struct dso *kernel = dsos__load_kernel(); if (kernel == NULL) return -1; - return dso__load_kernel_sym(kernel, filter, modules); + if (use_modules) { + if (dsos__load_modules() < 0) + pr_warning("Failed to load list of modules in use, " + "continuing...\n"); + else if (dsos__load_modules_sym(filter) < 0) + pr_warning("Failed to read module symbols, " + "continuing...\n"); + } + + if (dso__load_kernel_sym(kernel, filter, use_modules) < 0) + pr_warning("Failed to read kernel symbols, continuing...\n"); + + return 0; } void symbol__init(unsigned int priv_size) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f0593a6..3d9d346 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -78,9 +78,11 @@ void dso__delete(struct dso *self); struct symbol *dso__find_symbol(struct dso *self, u64 ip); int dsos__load_modules(void); +int dsos__load_modules_sym(symbol_filter_t filter); struct dso *dsos__findnew(const char *name); int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); -int dso__load_kernel_sym(struct dso *self, symbol_filter_t filter, int modules); +int dso__load_kernel_sym(struct dso *self, symbol_filter_t filter, + int use_modules); void dsos__fprintf(FILE *fp); size_t dsos__fprintf_buildid(FILE *fp); @@ -95,7 +97,7 @@ bool dsos__read_build_ids(void); int build_id__sprintf(u8 *self, int len, char *bf); struct dso *dsos__load_kernel(void); -int load_kernel(symbol_filter_t filter); +int load_kernel(symbol_filter_t filter, bool use_modules); void symbol__init(unsigned int priv_size); @@ -103,5 +105,4 @@ extern struct list_head dsos; extern struct map *kernel_map; extern struct dso *vdso; extern const char *vmlinux_name; -extern int modules; #endif /* __PERF_SYMBOL */ -- cgit v0.10.2 From fd7a346ea292074e9f6cdb5232a57c56bf98fdc9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Nov 2009 20:51:25 -0200 Subject: perf symbols: Filename__read_build_id should look at .notes section too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the kernel we have more than one notes section, so the linker script combines all and puts them into a ".notes" combined section. So we need to look at both sections and also traverse them looking at multiple GElf_Nhdr entries till we find the one we want, with the build_id. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258757489-5978-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 3b23c18..d220308 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -899,13 +899,19 @@ bool dsos__read_build_ids(void) return have_build_id; } +/* + * Align offset to 4 bytes as needed for note name and descriptor data. + */ +#define NOTE_ALIGN(n) (((n) + 3) & -4U) + int filename__read_build_id(const char *filename, void *bf, size_t size) { int fd, err = -1; GElf_Ehdr ehdr; GElf_Shdr shdr; - Elf_Data *build_id_data; + Elf_Data *data; Elf_Scn *sec; + void *ptr; Elf *elf; if (size < BUILD_ID_SIZE) @@ -928,14 +934,37 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) sec = elf_section_by_name(elf, &ehdr, &shdr, ".note.gnu.build-id", NULL); - if (sec == NULL) - goto out_elf_end; + if (sec == NULL) { + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".notes", NULL); + if (sec == NULL) + goto out_elf_end; + } - build_id_data = elf_getdata(sec, NULL); - if (build_id_data == NULL) + data = elf_getdata(sec, NULL); + if (data == NULL) goto out_elf_end; - memcpy(bf, build_id_data->d_buf + 16, BUILD_ID_SIZE); - err = BUILD_ID_SIZE; + + ptr = data->d_buf; + while (ptr < (data->d_buf + data->d_size)) { + GElf_Nhdr *nhdr = ptr; + int namesz = NOTE_ALIGN(nhdr->n_namesz), + descsz = NOTE_ALIGN(nhdr->n_descsz); + const char *name; + + ptr += sizeof(*nhdr); + name = ptr; + ptr += namesz; + if (nhdr->n_type == NT_GNU_BUILD_ID && + nhdr->n_namesz == sizeof("GNU")) { + if (memcmp(name, "GNU", sizeof("GNU")) == 0) { + memcpy(bf, ptr, BUILD_ID_SIZE); + err = BUILD_ID_SIZE; + break; + } + } + ptr += descsz; + } out_elf_end: elf_end(elf); out_close: @@ -963,8 +992,8 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) break; - namesz = (nhdr.n_namesz + 3) & -4U; - descsz = (nhdr.n_descsz + 3) & -4U; + namesz = NOTE_ALIGN(nhdr.n_namesz); + descsz = NOTE_ALIGN(nhdr.n_descsz); if (nhdr.n_type == NT_GNU_BUILD_ID && nhdr.n_namesz == sizeof("GNU")) { if (read(fd, bf, namesz) != namesz) -- cgit v0.10.2 From 78075caad99dc36ec6ef5826b7a5273ea14295fc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Nov 2009 20:51:26 -0200 Subject: perf symbols: Introduce dso__build_id_equal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Will be used in more places. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258757489-5978-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index d220308..c324bdf 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -884,6 +884,11 @@ out_close: return err; } +static bool dso__build_id_equal(const struct dso *self, u8 *build_id) +{ + return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0; +} + bool dsos__read_build_ids(void) { bool have_build_id = false; @@ -1099,8 +1104,7 @@ more: sizeof(build_id)) < 0) goto more; compare_build_id: - if (memcmp(build_id, self->build_id, - sizeof(self->build_id)) != 0) + if (!dso__build_id_equal(self, build_id)) goto more; } -- cgit v0.10.2 From c338aee853db197e1855b393e6d6cc667784537f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Nov 2009 20:51:27 -0200 Subject: perf symbols: Do lazy symtab loading for the kernel & modules too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just like we do with the other DSOs. This also simplifies the kernel_maps setup process, now all that the tools need to do is to call kernel_maps__init and the maps for the modules and kernel will be created, then, later, when kernel_maps__find_symbol() is used, it will also call maps__find_symbol that already checks if the symtab was loaded, loading it if needed. Now if one does 'perf top --hide_kernel_symbols' we won't pay the price of loading the (many) symbols in /proc/kallsyms or vmlinux. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258757489-5978-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index b6da147..2031527 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -157,7 +157,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (event->header.misc & PERF_RECORD_MISC_KERNEL) { level = 'k'; - sym = kernel_maps__find_symbol(ip, &map); + sym = kernel_maps__find_symbol(ip, &map, symbol_filter); dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); } else if (event->header.misc & PERF_RECORD_MISC_USER) { @@ -637,9 +637,9 @@ static int __cmd_annotate(void) exit(0); } - if (load_kernel(symbol_filter, use_modules) < 0) { - perror("failed to load kernel symbols"); - return EXIT_FAILURE; + if (kernel_maps__init(use_modules) < 0) { + pr_err("failed to create kernel maps for symbol resolution\b"); + return -1; } remap: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0af4840..0d39e80 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -449,7 +449,7 @@ got_map: * trick of looking in the whole kernel symbol list. */ if ((long long)ip < 0) - return kernel_maps__find_symbol(ip, mapp); + return kernel_maps__find_symbol(ip, mapp, NULL); } dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); @@ -496,7 +496,7 @@ static struct symbol **resolve_callchain(struct thread *thread, struct map *map, case PERF_CONTEXT_HV: break; case PERF_CONTEXT_KERNEL: - sym = kernel_maps__find_symbol(ip, &map); + sym = kernel_maps__find_symbol(ip, &map, NULL); break; default: sym = resolve_symbol(thread, &map, &ip); @@ -716,7 +716,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (cpumode == PERF_RECORD_MISC_KERNEL) { level = 'k'; - sym = kernel_maps__find_symbol(ip, &map); + sym = kernel_maps__find_symbol(ip, &map, NULL); dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); } else if (cpumode == PERF_RECORD_MISC_USER) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 48cc108..ea49c2e 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -830,6 +830,8 @@ static void handle_keypress(int c) case 'q': case 'Q': printf("exiting.\n"); + if (dump_symtab) + dsos__fprintf(stderr); exit(0); case 's': prompt_symbol(&sym_filter_entry, "Enter details symbol"); @@ -946,30 +948,6 @@ static int symbol_filter(struct map *map, struct symbol *sym) return 0; } -static int parse_symbols(void) -{ - struct dso *kernel = dsos__load_kernel(); - - if (kernel == NULL) - return -1; - - if (dsos__load_modules() < 0) - pr_debug("Couldn't read the complete list of modules, " - "continuing...\n"); - - if (dsos__load_modules_sym(symbol_filter) < 0) - pr_warning("Failed to read module symbols, continuing...\n"); - - if (dso__load_kernel_sym(kernel, symbol_filter, 1) <= 0) - pr_debug("Couldn't read the complete list of kernel symbols, " - "continuing...\n"); - - if (dump_symtab) - dsos__fprintf(stderr); - - return 0; -} - static void event__process_sample(const event_t *self, int counter) { u64 ip = self->ip.ip; @@ -1012,7 +990,7 @@ static void event__process_sample(const event_t *self, int counter) if (hide_kernel_symbols) return; - sym = kernel_maps__find_symbol(ip, &map); + sym = kernel_maps__find_symbol(ip, &map, symbol_filter); if (sym == NULL) return; break; @@ -1339,7 +1317,7 @@ static const struct option options[] = { int cmd_top(int argc, const char **argv, const char *prefix __used) { - int counter; + int counter, err; page_size = sysconf(_SC_PAGE_SIZE); @@ -1363,10 +1341,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (delay_secs < 1) delay_secs = 1; - parse_symbols(); + err = kernel_maps__init(true); + if (err < 0) + return err; parse_source(sym_filter_entry); - /* * User specified count overrides default frequency. */ diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index a444a26..e7b6c2b 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -171,8 +171,8 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, goto out_delete; err = -ENOMEM; - if (load_kernel(NULL, 1) < 0) { - pr_err("failed to load kernel symbols\n"); + if (kernel_maps__init(true) < 0) { + pr_err("failed to setup the kernel maps to resolve symbols\n"); goto out_delete; } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 34c6fcb..f1e3926 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -115,10 +115,13 @@ typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); void map__init(struct map *self, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen); +void map__delete(struct map *self); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); struct symbol *map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter); +void map__fixup_start(struct map *self); +void map__fixup_end(struct map *self); int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)); void event__synthesize_threads(int (*process)(event_t *event)); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d5c81eb..ac3410b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -253,11 +253,11 @@ static int perf_header__adds_write(struct perf_header *self, int fd) buildid_sec = &feat_sec[idx++]; - dsos__load_kernel(); /* - * Read the list of loaded modules with its build_ids + * Read the kernel buildid nad the list of loaded modules with + * its build_ids: */ - dsos__load_modules(); + kernel_maps__init(true); /* Write build-ids */ buildid_sec->offset = lseek(fd, 0, SEEK_CUR); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 94ca950..0941232 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -75,6 +75,29 @@ out_delete: return NULL; } +void map__delete(struct map *self) +{ + free(self); +} + +void map__fixup_start(struct map *self) +{ + struct rb_node *nd = rb_first(&self->dso->syms); + if (nd != NULL) { + struct symbol *sym = rb_entry(nd, struct symbol, rb_node); + self->start = sym->start; + } +} + +void map__fixup_end(struct map *self) +{ + struct rb_node *nd = rb_last(&self->dso->syms); + if (nd != NULL) { + struct symbol *sym = rb_entry(nd, struct symbol, rb_node); + self->end = sym->end; + } +} + #define DSO__DELETED "(deleted)" struct symbol * diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index c324bdf..cb086cb 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -27,6 +27,8 @@ static void dsos__add(struct dso *dso); static struct dso *dsos__find(const char *name); static struct map *map__new2(u64 start, struct dso *dso); static void kernel_maps__insert(struct map *map); +static int dso__load_kernel_sym(struct dso *self, struct map *map, + symbol_filter_t filter); unsigned int symbol__priv_size; static struct rb_root kernel_maps; @@ -69,12 +71,6 @@ static void kernel_maps__fixup_end(void) curr = rb_entry(nd, struct map, rb_node); prev->end = curr->start - 1; } - - nd = rb_last(&curr->dso->syms); - if (nd) { - struct symbol *sym = rb_entry(nd, struct symbol, rb_node); - curr->end = sym->end; - } } static struct symbol *symbol__new(u64 start, u64 len, const char *name) @@ -324,7 +320,7 @@ out_failure: * kernel range is broken in several maps, named [kernel].N, as we don't have * the original ELF section names vmlinux have. */ -static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) +static int kernel_maps__split_kallsyms(symbol_filter_t filter) { struct map *map = kernel_map; struct symbol *pos; @@ -340,9 +336,6 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) module = strchr(pos->name, '\t'); if (module) { - if (!use_modules) - goto delete_symbol; - *module++ = '\0'; if (strcmp(map->dso->name, module)) { @@ -382,7 +375,6 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) } if (filter && filter(map, pos)) { -delete_symbol: rb_erase(&pos->rb_node, &kernel_map->dso->syms); symbol__delete(pos); } else { @@ -398,17 +390,18 @@ delete_symbol: } -static int kernel_maps__load_kallsyms(symbol_filter_t filter, int use_modules) +static int kernel_maps__load_kallsyms(symbol_filter_t filter) { if (kernel_maps__load_all_kallsyms()) return -1; dso__fixup_sym_end(kernel_map->dso); + kernel_map->dso->origin = DSO__ORIG_KERNEL; - return kernel_maps__split_kallsyms(filter, use_modules); + return kernel_maps__split_kallsyms(filter); } -static size_t kernel_maps__fprintf(FILE *fp) +size_t kernel_maps__fprintf(FILE *fp) { size_t printed = fprintf(fp, "Kernel maps:\n"); struct rb_node *nd; @@ -1042,13 +1035,17 @@ char dso__symtab_origin(const struct dso *self) int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) { int size = PATH_MAX; - char *name = malloc(size); + char *name; u8 build_id[BUILD_ID_SIZE]; int ret = -1; int fd; self->loaded = 1; + if (self->kernel) + return dso__load_kernel_sym(self, map, filter); + + name = malloc(size); if (!name) return -1; @@ -1139,7 +1136,8 @@ static void kernel_maps__insert(struct map *map) maps__insert(&kernel_maps, map); } -struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp) +struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp, + symbol_filter_t filter) { struct map *map = maps__find(&kernel_maps, ip); @@ -1148,7 +1146,7 @@ struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp) if (map) { ip = map->map_ip(map, ip); - return map->dso->find_symbol(map->dso, ip); + return map__find_symbol(map, ip, filter); } return NULL; @@ -1168,28 +1166,9 @@ struct map *kernel_maps__find_by_dso_name(const char *name) return NULL; } -static int dso__load_module_sym(struct dso *self, struct map *map, - symbol_filter_t filter) -{ - int err = 0, fd = open(self->long_name, O_RDONLY); - - self->loaded = 1; - - if (fd < 0) { - pr_err("%s: cannot open %s\n", __func__, self->long_name); - return err; - } - - err = dso__load_sym(self, map, self->long_name, fd, filter, 0, 1); - close(fd); - - return err; -} - -static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter) +static int dsos__set_modules_path_dir(char *dirname) { struct dirent *dent; - int nr_symbols = 0, err; DIR *dir = opendir(dirname); if (!dir) { @@ -1207,14 +1186,12 @@ static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter) snprintf(path, sizeof(path), "%s/%s", dirname, dent->d_name); - err = dsos__load_modules_sym_dir(path, filter); - if (err < 0) + if (dsos__set_modules_path_dir(path) < 0) goto failure; } else { char *dot = strrchr(dent->d_name, '.'), dso_name[PATH_MAX]; struct map *map; - struct rb_node *last; char *long_name; if (dot == NULL || strcmp(dot, ".ko")) @@ -1234,36 +1211,16 @@ static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter) if (long_name == NULL) goto failure; dso__set_long_name(map->dso, long_name); - dso__set_basename(map->dso); - - err = dso__load_module_sym(map->dso, map, filter); - if (err < 0) - goto failure; - last = rb_last(&map->dso->syms); - if (last) { - struct symbol *sym; - /* - * We do this here as well, even having the - * symbol size found in the symtab because - * misannotated ASM symbols may have the size - * set to zero. - */ - dso__fixup_sym_end(map->dso); - - sym = rb_entry(last, struct symbol, rb_node); - map->end = map->start + sym->end; - } } - nr_symbols += err; } - return nr_symbols; + return 0; failure: closedir(dir); return -1; } -int dsos__load_modules_sym(symbol_filter_t filter) +static int dsos__set_modules_path(void) { struct utsname uts; char modules_path[PATH_MAX]; @@ -1274,7 +1231,7 @@ int dsos__load_modules_sym(symbol_filter_t filter) snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel", uts.release); - return dsos__load_modules_sym_dir(modules_path, filter); + return dsos__set_modules_path_dir(modules_path); } /* @@ -1296,7 +1253,7 @@ static struct map *map__new2(u64 start, struct dso *dso) return self; } -int dsos__load_modules(void) +static int kernel_maps__create_module_maps(void) { char *line = NULL; size_t n; @@ -1360,7 +1317,13 @@ int dsos__load_modules(void) free(line); fclose(file); - return 0; + /* + * Now that we have all sorted out, just set the ->end of all + * maps: + */ + kernel_maps__fixup_end(); + + return dsos__set_modules_path(); out_delete_line: free(line); @@ -1385,40 +1348,17 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, return err; } -int dso__load_kernel_sym(struct dso *self, symbol_filter_t filter, - int use_modules) +static int dso__load_kernel_sym(struct dso *self, struct map *map, + symbol_filter_t filter) { - int err; + int err = dso__load_vmlinux(self, map, self->name, filter); - kernel_map = map__new2(0, self); - if (kernel_map == NULL) - return -1; - - kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; - - err = dso__load_vmlinux(self, kernel_map, self->name, filter); if (err <= 0) - err = kernel_maps__load_kallsyms(filter, use_modules); + err = kernel_maps__load_kallsyms(filter); if (err > 0) { - struct rb_node *node = rb_first(&self->syms); - struct symbol *sym = rb_entry(node, struct symbol, rb_node); - - kernel_map->start = sym->start; - node = rb_last(&self->syms); - sym = rb_entry(node, struct symbol, rb_node); - kernel_map->end = sym->end; - - self->origin = DSO__ORIG_KERNEL; - kernel_maps__insert(kernel_map); - /* - * Now that we have all sorted out, just set the ->end of all - * maps: - */ - kernel_maps__fixup_end(); - - if (verbose) - kernel_maps__fprintf(stderr); + map__fixup_start(map); + map__fixup_end(map); } return err; @@ -1479,46 +1419,51 @@ size_t dsos__fprintf_buildid(FILE *fp) return ret; } -struct dso *dsos__load_kernel(void) +static int kernel_maps__create_kernel_map(void) { struct dso *kernel = dso__new(vmlinux_name); if (kernel == NULL) - return NULL; + return -1; + + kernel_map = map__new2(0, kernel); + if (kernel_map == NULL) + goto out_delete_kernel_dso; + + kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; kernel->short_name = "[kernel]"; + kernel->kernel = 1; vdso = dso__new("[vdso]"); - if (!vdso) - return NULL; + if (vdso == NULL) + goto out_delete_kernel_map; if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id, sizeof(kernel->build_id)) == 0) kernel->has_build_id = true; + kernel_maps__insert(kernel_map); dsos__add(kernel); dsos__add(vdso); - return kernel; + return 0; + +out_delete_kernel_map: + map__delete(kernel_map); + kernel_map = NULL; +out_delete_kernel_dso: + dso__delete(kernel); + return -1; } -int load_kernel(symbol_filter_t filter, bool use_modules) +int kernel_maps__init(bool use_modules) { - struct dso *kernel = dsos__load_kernel(); - - if (kernel == NULL) + if (kernel_maps__create_kernel_map() < 0) return -1; - if (use_modules) { - if (dsos__load_modules() < 0) - pr_warning("Failed to load list of modules in use, " - "continuing...\n"); - else if (dsos__load_modules_sym(filter) < 0) - pr_warning("Failed to read module symbols, " - "continuing...\n"); - } - - if (dso__load_kernel_sym(kernel, filter, use_modules) < 0) - pr_warning("Failed to read kernel symbols, continuing...\n"); + if (use_modules && kernel_maps__create_module_maps() < 0) + pr_warning("Failed to load list of modules in use, " + "continuing...\n"); return 0; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 3d9d346..7a12904 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -64,6 +64,7 @@ struct dso { u8 slen_calculated:1; u8 loaded:1; u8 has_build_id:1; + u8 kernel:1; unsigned char origin; u8 build_id[BUILD_ID_SIZE]; u16 long_name_len; @@ -77,12 +78,8 @@ void dso__delete(struct dso *self); struct symbol *dso__find_symbol(struct dso *self, u64 ip); -int dsos__load_modules(void); -int dsos__load_modules_sym(symbol_filter_t filter); struct dso *dsos__findnew(const char *name); int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); -int dso__load_kernel_sym(struct dso *self, symbol_filter_t filter, - int use_modules); void dsos__fprintf(FILE *fp); size_t dsos__fprintf_buildid(FILE *fp); @@ -96,8 +93,8 @@ int sysfs__read_build_id(const char *filename, void *bf, size_t size); bool dsos__read_build_ids(void); int build_id__sprintf(u8 *self, int len, char *bf); -struct dso *dsos__load_kernel(void); -int load_kernel(symbol_filter_t filter, bool use_modules); +int kernel_maps__init(bool use_modules); +size_t kernel_maps__fprintf(FILE *fp); void symbol__init(unsigned int priv_size); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 53addd7..e4b8d43 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -26,7 +26,8 @@ size_t threads__fprintf(FILE *fp); void maps__insert(struct rb_root *maps, struct map *map); struct map *maps__find(struct rb_root *maps, u64 ip); -struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp); +struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp, + symbol_filter_t filter); struct map *kernel_maps__find_by_dso_name(const char *name); static inline struct map *thread__find_map(struct thread *self, u64 ip) -- cgit v0.10.2 From fbd733b815a5a57d7eb0d904edc49d18fd12df5c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Nov 2009 20:51:28 -0200 Subject: perf symbols: Check vmlinux buildid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E.g.: [root@doppio linux-2.6-tip]# perf top -v --vmlinux ../build/tip/vmlinux > /dev/null build_id in vmlinux is e96699725a47413a50c231864a8e7a8ced40a31b while expected is 18e7cc53db62a7d35e9d6f6c9ddc23017d38ee9a, ignoring it I.e. perf top was told to use a vmlinux file that is not the one currently running on the machine, it ignores it and falls back to using /proc/kallsyms. This solves many, at first, mysterious results when people have a stale vmlinux file while keeping the default of trying to use the vmlinux file in the current directory in things like 'perf annotate' where the DWARF info is required and thus we can't use just /proc/kallsyms. Modules buildids are already being checked as of the previous changeset in this series, because we are using the default dso__load routine, that will look at a series of places looking for the best file with a matching buildid, starting in the -debuginfo directories. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258757489-5978-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index cb086cb..9cf6dbc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1334,13 +1334,37 @@ out_failure: static int dso__load_vmlinux(struct dso *self, struct map *map, const char *vmlinux, symbol_filter_t filter) { - int err, fd = open(vmlinux, O_RDONLY); + int err = -1, fd; - self->loaded = 1; + if (self->has_build_id) { + u8 build_id[BUILD_ID_SIZE]; + + if (filename__read_build_id(vmlinux, build_id, + sizeof(build_id)) < 0) { + pr_debug("No build_id in %s, ignoring it\n", vmlinux); + return -1; + } + if (!dso__build_id_equal(self, build_id)) { + char expected_build_id[BUILD_ID_SIZE * 2 + 1], + vmlinux_build_id[BUILD_ID_SIZE * 2 + 1]; + + build_id__sprintf(self->build_id, + sizeof(self->build_id), + expected_build_id); + build_id__sprintf(build_id, sizeof(build_id), + vmlinux_build_id); + pr_debug("build_id in %s is %s while expected is %s, " + "ignoring it\n", vmlinux, vmlinux_build_id, + expected_build_id); + return -1; + } + } + fd = open(vmlinux, O_RDONLY); if (fd < 0) return -1; + self->loaded = 1; err = dso__load_sym(self, map, self->long_name, fd, filter, 1, 0); close(fd); -- cgit v0.10.2 From ef6ae724253429ac70d81e65d052f6a346d330bd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 20 Nov 2009 20:51:29 -0200 Subject: perf symbols: Change the kernel DSO name if it comes from kallsyms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that the user have a clearer indication about the source of the symbols, as we only state buildid mismatches in verbose mode, because 'perf top' would overwrite such warning anyway. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258757489-5978-6-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9cf6dbc..48f87f0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -108,6 +108,8 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp) static void dso__set_long_name(struct dso *self, char *name) { + if (name == NULL) + return; self->long_name = name; self->long_name_len = strlen(name); } @@ -1377,8 +1379,11 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, { int err = dso__load_vmlinux(self, map, self->name, filter); - if (err <= 0) + if (err <= 0) { err = kernel_maps__load_kallsyms(filter); + if (err > 0) + dso__set_long_name(self, strdup("[kernel.kallsyms]")); + } if (err > 0) { map__fixup_start(map); -- cgit v0.10.2 From 453f19eea7dbad837425e9b07d84568d14898794 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:43 +0100 Subject: perf: Allow for custom overflow handlers in-kernel perf users might wish to have custom actions on the sample interrupt. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.222339539@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b5cdac0..a430ac3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -567,6 +567,8 @@ struct perf_pending_entry { typedef void (*perf_callback_t)(struct perf_event *, void *); +struct perf_sample_data; + /** * struct perf_event - performance event kernel representation: */ @@ -658,6 +660,10 @@ struct perf_event { struct pid_namespace *ns; u64 id; + void (*overflow_handler)(struct perf_event *event, + int nmi, struct perf_sample_data *data, + struct pt_regs *regs); + #ifdef CONFIG_EVENT_PROFILE struct event_filter *filter; #endif diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3852e26..1dfb6cc 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3710,7 +3710,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, perf_event_disable(event); } - perf_event_output(event, nmi, data, regs); + if (event->overflow_handler) + event->overflow_handler(event, nmi, data, regs); + else + perf_event_output(event, nmi, data, regs); + return ret; } @@ -4836,6 +4840,8 @@ inherit_event(struct perf_event *parent_event, if (parent_event->attr.freq) child_event->hw.sample_period = parent_event->hw.sample_period; + child_event->overflow_handler = parent_event->overflow_handler; + /* * Link it up in the child's context: */ -- cgit v0.10.2 From 0cff784ae41cc125368ae77f1c01328ae2fdc6b3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:44 +0100 Subject: perf: Optimize some swcounter attr.sample_period==1 paths Avoid the rather expensive perf_swevent_set_period() if we know we have to sample every single event anyway. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.299508332@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 1dfb6cc..8e55b44 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3759,16 +3759,16 @@ again: return nr; } -static void perf_swevent_overflow(struct perf_event *event, +static void perf_swevent_overflow(struct perf_event *event, u64 overflow, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { struct hw_perf_event *hwc = &event->hw; int throttle = 0; - u64 overflow; data->period = event->hw.last_period; - overflow = perf_swevent_set_period(event); + if (!overflow) + overflow = perf_swevent_set_period(event); if (hwc->interrupts == MAX_INTERRUPTS) return; @@ -3801,14 +3801,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, atomic64_add(nr, &event->count); + if (!regs) + return; + if (!hwc->sample_period) return; - if (!regs) + if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) + return perf_swevent_overflow(event, 1, nmi, data, regs); + + if (atomic64_add_negative(nr, &hwc->period_left)) return; - if (!atomic64_add_negative(nr, &hwc->period_left)) - perf_swevent_overflow(event, nmi, data, regs); + perf_swevent_overflow(event, 0, nmi, data, regs); } static int perf_swevent_is_counting(struct perf_event *event) -- cgit v0.10.2 From 81520183878a8813c71c9372de28bb70913ba549 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:45 +0100 Subject: perf: Optimize perf_swevent_ctx_event() Remove a rcu_read_{,un}lock() pair and a few conditionals. We can remove the rcu_read_lock() by increasing the scope of one in the calling function. We can do away with the system_state check if the machine still boots after this patch (seems to be the case). We can do away with the list_empty() check because the bare list_for_each_entry_rcu() reduces to that now that we've removed everything else. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.378188589@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 8e55b44..cda17ac 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3886,15 +3886,10 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, { struct perf_event *event; - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_swevent_match(event, type, event_id, data, regs)) perf_swevent_add(event, nr, nmi, data, regs); } - rcu_read_unlock(); } static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) @@ -3926,9 +3921,9 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, (*recursion)++; barrier(); + rcu_read_lock(); perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, nr, nmi, data, regs); - rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. -- cgit v0.10.2 From d6ff86cfb50a72df820e7e839836d55d245306fb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:46 +0100 Subject: perf: Optimize perf_event_task_ctx() Remove a rcu_read_{,un}lock() pair and a few conditionals. We can remove the rcu_read_lock() by increasing the scope of one in the calling function. We can do away with the system_state check if the machine still boots after this patch (seems to be the case). We can do away with the list_empty() check because the bare list_for_each_entry_rcu() reduces to that now that we've removed everything else. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.452227115@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index cda17ac..2afb305 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3267,15 +3267,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, { struct perf_event *event; - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_task_match(event)) perf_event_task_output(event, task_event); } - rcu_read_unlock(); } static void perf_event_task_event(struct perf_task_event *task_event) @@ -3283,11 +3278,11 @@ static void perf_event_task_event(struct perf_task_event *task_event) struct perf_cpu_context *cpuctx; struct perf_event_context *ctx = task_event->task_ctx; + rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_task_ctx(&cpuctx->ctx, task_event); put_cpu_var(perf_cpu_context); - rcu_read_lock(); if (!ctx) ctx = rcu_dereference(task_event->task->perf_event_ctxp); if (ctx) -- cgit v0.10.2 From f6595f3a9680c86b6332f881a7ae2cbbcfdc8619 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:47 +0100 Subject: perf: Optimize perf_event_comm_ctx() Remove a rcu_read_{,un}lock() pair and a few conditionals. We can remove the rcu_read_lock() by increasing the scope of one in the calling function. We can do away with the system_state check if the machine still boots after this patch (seems to be the case). We can do away with the list_empty() check because the bare list_for_each_entry_rcu() reduces to that now that we've removed everything else. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.527608793@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2afb305..4deefaa 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3374,15 +3374,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx, { struct perf_event *event; - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_comm_match(event)) perf_event_comm_output(event, comm_event); } - rcu_read_unlock(); } static void perf_event_comm_event(struct perf_comm_event *comm_event) @@ -3401,11 +3396,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; + rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_comm_ctx(&cpuctx->ctx, comm_event); put_cpu_var(perf_cpu_context); - rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. -- cgit v0.10.2 From f6d9dd237da400effb265f3554c64413f8a3e7b4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:48 +0100 Subject: perf: Optimize perf_event_mmap_ctx() Remove a rcu_read_{,un}lock() pair and a few conditionals. We can remove the rcu_read_lock() by increasing the scope of one in the calling function. We can do away with the system_state check if the machine still boots after this patch (seems to be the case). We can do away with the list_empty() check because the bare list_for_each_entry_rcu() reduces to that now that we've removed everything else. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.606459548@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 4deefaa..68fbf4f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3493,15 +3493,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx, { struct perf_event *event; - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_mmap_match(event, mmap_event)) perf_event_mmap_output(event, mmap_event); } - rcu_read_unlock(); } static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) @@ -3557,11 +3552,11 @@ got_name: mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; + rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); put_cpu_var(perf_cpu_context); - rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. -- cgit v0.10.2 From abf4868b8548cae18d4fe8bbfb4e207443be01be Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:49 +0100 Subject: perf: Fix PERF_FORMAT_GROUP scale info As Corey reported, the total_enabled and total_running times could occasionally be 0, even though there were events counted. It turns out this is because we record the times before reading the counter while the latter updates the times. This patch corrects that. While looking at this code I found that there is a lot of locking iffyness around, the following patches correct most of that. Reported-by: Corey Ashford Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.685559857@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 68fbf4f..9a18ff2 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1784,30 +1784,15 @@ u64 perf_event_read_value(struct perf_event *event) } EXPORT_SYMBOL_GPL(perf_event_read_value); -static int perf_event_read_entry(struct perf_event *event, - u64 read_format, char __user *buf) -{ - int n = 0, count = 0; - u64 values[2]; - - values[n++] = perf_event_read_value(event); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_event_id(event); - - count = n * sizeof(u64); - - if (copy_to_user(buf, values, count)) - return -EFAULT; - - return count; -} - static int perf_event_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event->group_leader, *sub; - int n = 0, size = 0, err = -EFAULT; - u64 values[3]; + int n = 0, size = 0, ret = 0; + u64 values[5]; + u64 count; + + count = perf_event_read_value(leader); values[n++] = 1 + leader->nr_siblings; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { @@ -1818,28 +1803,33 @@ static int perf_event_read_group(struct perf_event *event, values[n++] = leader->total_time_running + atomic64_read(&leader->child_total_time_running); } + values[n++] = count; + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(leader); size = n * sizeof(u64); if (copy_to_user(buf, values, size)) return -EFAULT; - err = perf_event_read_entry(leader, read_format, buf + size); - if (err < 0) - return err; - - size += err; + ret += size; list_for_each_entry(sub, &leader->sibling_list, group_entry) { - err = perf_event_read_entry(sub, read_format, - buf + size); - if (err < 0) - return err; + n = 0; - size += err; + values[n++] = perf_event_read_value(sub); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(sub); + + size = n * sizeof(u64); + + if (copy_to_user(buf + size, values, size)) + return -EFAULT; + + ret += size; } - return size; + return ret; } static int perf_event_read_one(struct perf_event *event, -- cgit v0.10.2 From 02ffdbc866c8b1c8644601e9aa6155700eed4c91 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:50 +0100 Subject: perf: Optimize perf_event_task_sched_out Remove an update_context_time() call from the perf_event_task_sched_out() path and into the branch its needed. The call was both superfluous, because __perf_event_sched_out() already does it, and wrong, because it was done without holding ctx->lock. Place it in perf_event_sync_stat(), which is the only place it is needed and which does already hold ctx->lock. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.779516394@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9a18ff2..65f4dab 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1120,6 +1120,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, if (!ctx->nr_stat) return; + update_context_time(ctx); + event = list_first_entry(&ctx->event_list, struct perf_event, event_entry); @@ -1163,8 +1165,6 @@ void perf_event_task_sched_out(struct task_struct *task, if (likely(!ctx || !cpuctx->task_ctx)) return; - update_context_time(ctx); - rcu_read_lock(); parent = rcu_dereference(ctx->parent_ctx); next_ctx = next->perf_event_ctxp; -- cgit v0.10.2 From f6f83785222b0ee037f7be90731f62a649292b5e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:51 +0100 Subject: perf: Optimize __perf_event_read() Both callers actually have IRQs disabled, no need doing so again. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.863685796@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 65f4dab..e66f6c4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1517,7 +1517,6 @@ static void __perf_event_read(void *info) struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; - unsigned long flags; /* * If this is a task context, we need to check whether it is @@ -1529,12 +1528,10 @@ static void __perf_event_read(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - local_irq_save(flags); if (ctx->is_active) update_context_time(ctx); event->pmu->read(event); update_event_times(event); - local_irq_restore(flags); } static u64 perf_event_read(struct perf_event *event) -- cgit v0.10.2 From 3dbebf15c5d3e265f751eec72c1538a00da4be27 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:52 +0100 Subject: perf: Simplify __perf_event_sync_stat Removes constraints from __perf_event_read() by leaving it with a single callsite; this callsite had ctx->lock held, the other one does not. Removes some superfluous code from __perf_event_sync_stat(). Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212508.918544317@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index e66f6c4..af150bb 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1061,8 +1061,6 @@ static int context_equiv(struct perf_event_context *ctx1, && !ctx1->pin_count && !ctx2->pin_count; } -static void __perf_event_read(void *event); - static void __perf_event_sync_stat(struct perf_event *event, struct perf_event *next_event) { @@ -1080,8 +1078,8 @@ static void __perf_event_sync_stat(struct perf_event *event, */ switch (event->state) { case PERF_EVENT_STATE_ACTIVE: - __perf_event_read(event); - break; + event->pmu->read(event); + /* fall-through */ case PERF_EVENT_STATE_INACTIVE: update_event_times(event); -- cgit v0.10.2 From 58e5ad1de3d6ad931c84f0cc8ef0655c922f30ad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:53 +0100 Subject: perf: Simplify __perf_event_read cpuctx is always active, task context is always active for current the previous condition verifies that if its a task context its for current, hence we can assume ctx->is_active. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212509.000272254@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index af150bb..028619d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1526,10 +1526,9 @@ static void __perf_event_read(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - if (ctx->is_active) - update_context_time(ctx); - event->pmu->read(event); + update_context_time(ctx); update_event_times(event); + event->pmu->read(event); } static u64 perf_event_read(struct perf_event *event) -- cgit v0.10.2 From 2b8988c9f7defe319cffe0cd362a7cd356c86f62 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:54 +0100 Subject: perf: Fix time locking Most sites updating ctx->time and event times do so under ctx->lock, make sure they all do. This was made possible by removing the __perf_event_read() call from __perf_event_sync_stat(), which already had this lock taken. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212509.102316434@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 028619d..fdfae88 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1526,8 +1526,11 @@ static void __perf_event_read(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; + spin_lock(&ctx->lock); update_context_time(ctx); update_event_times(event); + spin_unlock(&ctx->lock); + event->pmu->read(event); } @@ -1541,7 +1544,13 @@ static u64 perf_event_read(struct perf_event *event) smp_call_function_single(event->oncpu, __perf_event_read, event, 1); } else if (event->state == PERF_EVENT_STATE_INACTIVE) { + struct perf_event_context *ctx = event->ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->lock, flags); + update_context_time(ctx); update_event_times(event); + spin_unlock_irqrestore(&ctx->lock, flags); } return atomic64_read(&event->count); -- cgit v0.10.2 From 59ed446f792cc07d37b1536b9c4664d14e25e425 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:55 +0100 Subject: perf: Fix event scaling for inherited counters Properly account the full hierarchy of counters for both the count (we already did so) and the scale times (new). Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212509.153379276@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index a430ac3..36fe89f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -782,7 +782,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, pid_t pid, perf_callback_t callback); -extern u64 perf_event_read_value(struct perf_event *event); +extern u64 perf_event_read_value(struct perf_event *event, + u64 *enabled, u64 *running); struct perf_sample_data { u64 type; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index fdfae88..80f40da 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1774,14 +1774,25 @@ static int perf_event_read_size(struct perf_event *event) return size; } -u64 perf_event_read_value(struct perf_event *event) +u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) { struct perf_event *child; u64 total = 0; + *enabled = 0; + *running = 0; + total += perf_event_read(event); - list_for_each_entry(child, &event->child_list, child_list) + *enabled += event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); + *running += event->total_time_running + + atomic64_read(&event->child_total_time_running); + + list_for_each_entry(child, &event->child_list, child_list) { total += perf_event_read(child); + *enabled += child->total_time_enabled; + *running += child->total_time_running; + } return total; } @@ -1793,19 +1804,15 @@ static int perf_event_read_group(struct perf_event *event, struct perf_event *leader = event->group_leader, *sub; int n = 0, size = 0, ret = 0; u64 values[5]; - u64 count; + u64 count, enabled, running; - count = perf_event_read_value(leader); + count = perf_event_read_value(leader, &enabled, &running); values[n++] = 1 + leader->nr_siblings; - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = leader->total_time_enabled + - atomic64_read(&leader->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = leader->total_time_running + - atomic64_read(&leader->child_total_time_running); - } + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = enabled; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = running; values[n++] = count; if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(leader); @@ -1820,7 +1827,7 @@ static int perf_event_read_group(struct perf_event *event, list_for_each_entry(sub, &leader->sibling_list, group_entry) { n = 0; - values[n++] = perf_event_read_value(sub); + values[n++] = perf_event_read_value(sub, &enabled, &running); if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(sub); @@ -1838,18 +1845,15 @@ static int perf_event_read_group(struct perf_event *event, static int perf_event_read_one(struct perf_event *event, u64 read_format, char __user *buf) { + u64 enabled, running; u64 values[4]; int n = 0; - values[n++] = perf_event_read_value(event); - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - values[n++] = event->total_time_enabled + - atomic64_read(&event->child_total_time_enabled); - } - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - values[n++] = event->total_time_running + - atomic64_read(&event->child_total_time_running); - } + values[n++] = perf_event_read_value(event, &enabled, &running); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = enabled; + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = running; if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); -- cgit v0.10.2 From 6f10581aeaa5543a3b7a8c7a87a064375ec357f8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Nov 2009 22:19:56 +0100 Subject: perf: Fix locking for PERF_FORMAT_GROUP We should hold event->child_mutex when iterating the inherited counters, we should hold ctx->mutex when iterating siblings. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212509.251030114@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 80f40da..3ede098 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1782,6 +1782,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) *enabled = 0; *running = 0; + mutex_lock(&event->child_mutex); total += perf_event_read(event); *enabled += event->total_time_enabled + atomic64_read(&event->child_total_time_enabled); @@ -1793,6 +1794,7 @@ u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) *enabled += child->total_time_enabled; *running += child->total_time_running; } + mutex_unlock(&event->child_mutex); return total; } @@ -1802,10 +1804,12 @@ static int perf_event_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event->group_leader, *sub; - int n = 0, size = 0, ret = 0; + int n = 0, size = 0, ret = -EFAULT; + struct perf_event_context *ctx = leader->ctx; u64 values[5]; u64 count, enabled, running; + mutex_lock(&ctx->mutex); count = perf_event_read_value(leader, &enabled, &running); values[n++] = 1 + leader->nr_siblings; @@ -1820,9 +1824,9 @@ static int perf_event_read_group(struct perf_event *event, size = n * sizeof(u64); if (copy_to_user(buf, values, size)) - return -EFAULT; + goto unlock; - ret += size; + ret = size; list_for_each_entry(sub, &leader->sibling_list, group_entry) { n = 0; @@ -1833,11 +1837,15 @@ static int perf_event_read_group(struct perf_event *event, size = n * sizeof(u64); - if (copy_to_user(buf + size, values, size)) - return -EFAULT; + if (copy_to_user(buf + size, values, size)) { + ret = -EFAULT; + goto unlock; + } ret += size; } +unlock: + mutex_unlock(&ctx->mutex); return ret; } @@ -1884,12 +1892,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) return -ENOSPC; WARN_ON_ONCE(event->ctx->parent_ctx); - mutex_lock(&event->child_mutex); if (read_format & PERF_FORMAT_GROUP) ret = perf_event_read_group(event, read_format, buf); else ret = perf_event_read_one(event, read_format, buf); - mutex_unlock(&event->child_mutex); return ret; } -- cgit v0.10.2 From 8904b18046c2f050107f6449e887e7c1142b9ab9 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 20 Nov 2009 22:19:57 +0100 Subject: perf_events: Fix default watermark calculation This patch fixes the default watermark value for the sampling buffer. With the existing calculation (watermark = max(PAGE_SIZE, max_size / 2)), no notification was ever received when the buffer was exactly 1 page. This was because you would never cross the threshold (there is no partial samples). In certain configuration, there was no possibilty detecting the problem because there was not enough space left to store the LOST record.In fact, there may be a more generic problem here. The kernel should ensure that there is alaways enough space to store one LOST record. This patch sets the default watermark to half the buffer size. With such limit, we are guaranteed to get a notification even with a single page buffer assuming no sample is bigger than a page. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20091120212509.344964101@chello.nl> Signed-off-by: Ingo Molnar LKML-Reference: <1256302576-6169-1-git-send-email-eranian@gmail.com> diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3ede098..718fa93 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2340,7 +2340,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) } if (!data->watermark) - data->watermark = max_t(long, PAGE_SIZE, max_size / 2); + data->watermark = max_size / 2; rcu_assign_pointer(event->data, data); -- cgit v0.10.2 From 90c83218c32d7c474da810cd3c9973a43ecbcb9b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 21 Nov 2009 14:31:24 -0200 Subject: perf symbols: Fixup kernel_maps__fixup_end end map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We better call this routine after both the kernel and modules are loaded, because as it was if there weren't modules it would not be called, resulting in kernel_map->end remaining at zero, so no map would be found and consequently the kernel symtab wouldn't get loaded, i.e. no kernel symbols would be resolved. Also this fixes another case, that is when we _have_ modules, but the last map would have its ->end address not set before we loaded its symbols, which would never happen because ->end was not set. Reported-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258821086-11521-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 48f87f0..e161a51 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -71,6 +71,12 @@ static void kernel_maps__fixup_end(void) curr = rb_entry(nd, struct map, rb_node); prev->end = curr->start - 1; } + + /* + * We still haven't the actual symbols, so guess the + * last map final address. + */ + curr->end = ~0UL; } static struct symbol *symbol__new(u64 start, u64 len, const char *name) @@ -1319,12 +1325,6 @@ static int kernel_maps__create_module_maps(void) free(line); fclose(file); - /* - * Now that we have all sorted out, just set the ->end of all - * maps: - */ - kernel_maps__fixup_end(); - return dsos__set_modules_path(); out_delete_line: @@ -1493,7 +1493,10 @@ int kernel_maps__init(bool use_modules) if (use_modules && kernel_maps__create_module_maps() < 0) pr_warning("Failed to load list of modules in use, " "continuing...\n"); - + /* + * Now that we have all the maps created, just set the ->end of them: + */ + kernel_maps__fixup_end(); return 0; } -- cgit v0.10.2 From c12e15e71d4b32da045e798ffd21cbb6197d1c65 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 21 Nov 2009 14:31:25 -0200 Subject: perf symbols: Old versions of elf.h don't have NT_GNU_BUILD_ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258821086-11521-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e161a51..86ec6c7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -12,6 +12,10 @@ #include #include +#ifndef NT_GNU_BUILD_ID +#define NT_GNU_BUILD_ID 3 +#endif + enum dso_origin { DSO__ORIG_KERNEL = 0, DSO__ORIG_JAVA_JIT, -- cgit v0.10.2 From e25613683bd5c46d3e8c8ae6416dccc9f357dcdc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 21 Nov 2009 14:31:26 -0200 Subject: perf trace: Read_tracing_data should die() another day MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It better propagate errors, also if we do a simple: [root@doppio linux-2.6-tip]# perf record -R -a -f sleep 3s ; perf trace [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.182 MB perf.data (~7972 samples) ] Fatal: not an trace data file [root@doppio linux-2.6-tip]# That is what is expected, right? I.e. as we didn't specify any tracepoint event via -e, it should gracefully bail out and not SEGFAULT. Signed-off-by: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258821086-11521-3-git-send-email-acme@infradead.org> [ Fixed the error messages some more ] Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index 831052d..cace355 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -33,11 +33,11 @@ #include #include #include +#include #include "../perf.h" #include "trace-event.h" - #define VERSION "0.5" #define _STR(x) #x @@ -483,23 +483,31 @@ static struct tracepoint_path * get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events) { struct tracepoint_path path, *ppath = &path; - int i; + int i, nr_tracepoints = 0; for (i = 0; i < nb_events; i++) { if (pattrs[i].type != PERF_TYPE_TRACEPOINT) continue; + ++nr_tracepoints; ppath->next = tracepoint_id_to_path(pattrs[i].config); if (!ppath->next) die("%s\n", "No memory to alloc tracepoints list"); ppath = ppath->next; } - return path.next; + return nr_tracepoints > 0 ? path.next : NULL; } -void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) + +int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) { char buf[BUFSIZ]; - struct tracepoint_path *tps; + struct tracepoint_path *tps = get_tracepoints_path(pattrs, nb_events); + + /* + * What? No tracepoints? No sense writing anything here, bail out. + */ + if (tps == NULL) + return -1; output_fd = fd; @@ -528,11 +536,11 @@ void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) page_size = getpagesize(); write_or_die(&page_size, 4); - tps = get_tracepoints_path(pattrs, nb_events); - read_header_files(); read_ftrace_files(tps); read_event_files(tps); read_proc_kallsyms(); read_ftrace_printk(); + + return 0; } diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 44292e0..342dfdd 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -471,11 +471,11 @@ void trace_report(int fd) read_or_die(buf, 3); if (memcmp(buf, test, 3) != 0) - die("not an trace data file"); + die("no trace data in the file"); read_or_die(buf, 7); if (memcmp(buf, "tracing", 7) != 0) - die("not a trace file (missing tracing)"); + die("not a trace file (missing 'tracing' tag)"); version = read_string(); if (show_version) diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index f6637c2..dd51c687 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -248,7 +248,7 @@ unsigned long long raw_field_value(struct event *event, const char *name, void *data); void *raw_field_ptr(struct event *event, const char *name, void *data); -void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); +int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); /* taken from kernel/trace/trace.h */ enum trace_flag_type { -- cgit v0.10.2 From ce71b9df8893ec954e56c5979df6da274f20f65e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Nov 2009 05:26:55 +0100 Subject: tracing: Use the perf recursion protection from trace event When we commit a trace to perf, we first check if we are recursing in the same buffer so that we don't mess-up the buffer with a recursing trace. But later on, we do the same check from perf to avoid commit recursion. The recursion check is desired early before we touch the buffer but we want to do this check only once. Then export the recursion protection from perf and use it from the trace events before submitting a trace. v2: Put appropriate Reported-by tag Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Jason Baron LKML-Reference: <1258864015-10579-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 43360c1..47bbdf9 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -137,13 +137,8 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -struct perf_trace_buf { - char buf[FTRACE_MAX_PROFILE_SIZE]; - int recursion; -}; - -extern struct perf_trace_buf *perf_trace_buf; -extern struct perf_trace_buf *perf_trace_buf_nmi; +extern char *perf_trace_buf; +extern char *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 36fe89f..74e98b1 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); +extern int perf_swevent_get_recursion_context(int **recursion); +extern void perf_swevent_put_recursion_context(int *recursion); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -902,6 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } +static int perf_swevent_get_recursion_context(int **recursion) { return -1; } +static void perf_swevent_put_recursion_context(int *recursion) { } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 4945d1c9..c222ef5 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -724,16 +724,19 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ + extern int perf_swevent_get_recursion_context(int **recursion); \ + extern void perf_swevent_put_recursion_context(int *recursion); \ struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ - struct perf_trace_buf *trace_buf; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ + char *trace_buf; \ char *raw_data; \ + int *recursion; \ int __cpu; \ int pc; \ \ @@ -749,6 +752,10 @@ static void ftrace_profile_##call(proto) \ return; \ \ local_irq_save(irq_flags); \ + \ + if (perf_swevent_get_recursion_context(&recursion)) \ + goto end_recursion; \ + \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ @@ -759,13 +766,7 @@ static void ftrace_profile_##call(proto) \ if (!trace_buf) \ goto end; \ \ - trace_buf = per_cpu_ptr(trace_buf, __cpu); \ - if (trace_buf->recursion++) \ - goto end_recursion; \ - \ - barrier(); \ - \ - raw_data = trace_buf->buf; \ + raw_data = per_cpu_ptr(trace_buf, __cpu); \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -780,9 +781,9 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ -end_recursion: \ - trace_buf->recursion--; \ -end: \ +end: \ + perf_swevent_put_recursion_context(recursion); \ +end_recursion: \ local_irq_restore(irq_flags); \ \ } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 718fa93..aba8227 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3880,34 +3880,42 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, } } -static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) +/* + * Must be called with preemption disabled + */ +int perf_swevent_get_recursion_context(int **recursion) { + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + if (in_nmi()) - return &cpuctx->recursion[3]; + *recursion = &cpuctx->recursion[3]; + else if (in_irq()) + *recursion = &cpuctx->recursion[2]; + else if (in_softirq()) + *recursion = &cpuctx->recursion[1]; + else + *recursion = &cpuctx->recursion[0]; - if (in_irq()) - return &cpuctx->recursion[2]; + if (**recursion) + return -1; - if (in_softirq()) - return &cpuctx->recursion[1]; + (**recursion)++; - return &cpuctx->recursion[0]; + return 0; } -static void do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) +void perf_swevent_put_recursion_context(int *recursion) { - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - int *recursion = perf_swevent_recursion_context(cpuctx); - struct perf_event_context *ctx; - - if (*recursion) - goto out; + (*recursion)--; +} - (*recursion)++; - barrier(); +static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct perf_event_context *ctx; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); rcu_read_lock(); perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, @@ -3920,12 +3928,25 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, if (ctx) perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); rcu_read_unlock(); +} - barrier(); - (*recursion)--; +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + int *recursion; + + preempt_disable(); + + if (perf_swevent_get_recursion_context(&recursion)) + goto out; + + __do_perf_sw_event(type, event_id, nr, nmi, data, regs); + perf_swevent_put_recursion_context(recursion); out: - put_cpu_var(perf_cpu_context); + preempt_enable(); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, @@ -4159,7 +4180,8 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, if (!regs) regs = task_pt_regs(current); - do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, + /* Trace events already protected against recursion */ + __do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data, regs); } EXPORT_SYMBOL_GPL(perf_tp_event); diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index e0d351b..d9c60f8 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -9,31 +9,33 @@ #include "trace.h" -struct perf_trace_buf *perf_trace_buf; +char *perf_trace_buf; EXPORT_SYMBOL_GPL(perf_trace_buf); -struct perf_trace_buf *perf_trace_buf_nmi; +char *perf_trace_buf_nmi; EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); +typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; + /* Count the events in use (per event id, not per instance) */ static int total_profile_count; static int ftrace_profile_enable_event(struct ftrace_event_call *event) { - struct perf_trace_buf *buf; + char *buf; int ret = -ENOMEM; if (atomic_inc_return(&event->profile_count)) return 0; if (!total_profile_count) { - buf = alloc_percpu(struct perf_trace_buf); + buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail_buf; rcu_assign_pointer(perf_trace_buf, buf); - buf = alloc_percpu(struct perf_trace_buf); + buf = (char *)alloc_percpu(perf_trace_t); if (!buf) goto fail_buf_nmi; @@ -79,7 +81,7 @@ int ftrace_profile_enable(int event_id) static void ftrace_profile_disable_event(struct ftrace_event_call *event) { - struct perf_trace_buf *buf, *nmi_buf; + char *buf, *nmi_buf; if (!atomic_add_negative(-1, &event->profile_count)) return; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3696476..22e6f68 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1208,11 +1208,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; - struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; + char *trace_buf; char *raw_data; + int *recursion; pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); @@ -1227,6 +1228,10 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, * This also protects the rcu read side */ local_irq_save(irq_flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + __cpu = smp_processor_id(); if (in_nmi()) @@ -1237,18 +1242,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, __cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, __cpu); /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -1263,9 +1257,9 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ip, 1, entry, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(irq_flags); return 0; @@ -1278,10 +1272,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; - struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; + char *trace_buf; + int *recursion; char *raw_data; pc = preempt_count(); @@ -1297,6 +1292,10 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, * This also protects the rcu read side */ local_irq_save(irq_flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + __cpu = smp_processor_id(); if (in_nmi()) @@ -1307,18 +1306,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, __cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, __cpu); /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -1334,9 +1322,9 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ret_ip, 1, entry, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(irq_flags); return 0; diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 51213b0..0bb9348 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -477,10 +477,11 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; - struct perf_trace_buf *trace_buf; struct syscall_trace_enter *rec; unsigned long flags; + char *trace_buf; char *raw_data; + int *recursion; int syscall_nr; int size; int cpu; @@ -505,6 +506,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + cpu = smp_processor_id(); if (in_nmi()) @@ -515,18 +519,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -539,9 +532,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) (unsigned long *)&rec->args); perf_tp_event(sys_data->enter_id, 0, 1, rec, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(flags); } @@ -588,10 +581,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; - struct perf_trace_buf *trace_buf; unsigned long flags; int syscall_nr; + char *trace_buf; char *raw_data; + int *recursion; int size; int cpu; @@ -617,6 +611,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); + + if (perf_swevent_get_recursion_context(&recursion)) + goto end_recursion; + cpu = smp_processor_id(); if (in_nmi()) @@ -627,18 +625,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) if (!trace_buf) goto end; - trace_buf = per_cpu_ptr(trace_buf, cpu); - - if (trace_buf->recursion++) - goto end_recursion; - - /* - * Make recursion update visible before entering perf_tp_event - * so that we protect from perf recursions. - */ - barrier(); - - raw_data = trace_buf->buf; + raw_data = per_cpu_ptr(trace_buf, cpu); /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -652,9 +639,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); -end_recursion: - trace_buf->recursion--; end: + perf_swevent_put_recursion_context(recursion); +end_recursion: local_irq_restore(flags); } -- cgit v0.10.2 From 28889bf9e2db29747d58cd47a92d727f927c3aee Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Nov 2009 05:21:33 +0100 Subject: tracing: Forget about the NMI buffer for syscall events We are never in an NMI context when we commit a syscall trace to perf. So just forget about the nmi buffer there. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Steven Rostedt Cc: Jason Baron LKML-Reference: <1258863695-10464-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 0bb9348..41b6dd9 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -511,10 +511,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) cpu = smp_processor_id(); - if (in_nmi()) - trace_buf = rcu_dereference(perf_trace_buf_nmi); - else - trace_buf = rcu_dereference(perf_trace_buf); + trace_buf = rcu_dereference(perf_trace_buf); if (!trace_buf) goto end; @@ -617,10 +614,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) cpu = smp_processor_id(); - if (in_nmi()) - trace_buf = rcu_dereference(perf_trace_buf_nmi); - else - trace_buf = rcu_dereference(perf_trace_buf); + trace_buf = rcu_dereference(perf_trace_buf); if (!trace_buf) goto end; -- cgit v0.10.2 From b3a75542d329ce4e1c66b293cefeb4429a2af043 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Nov 2009 05:21:34 +0100 Subject: hw-breakpoints: Remove x86 specific headers from core file Remove asm/processor.h and asm/debugreg.h as these headers are not used anymore in the hw-breakpoints core file. Signed-off-by: Frederic Weisbecker Cc: Prasad LKML-Reference: <1258863695-10464-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 9ea9414..b6d6fa2 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -40,12 +40,6 @@ #include -#include - -#ifdef CONFIG_X86 -#include -#endif - /* * Constraints data */ -- cgit v0.10.2 From 5093ebad5f2348076fdc3dac7d2358b1ad7f85f7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 Nov 2009 05:21:35 +0100 Subject: hw-breakpoints: Separate the kernel part from breakpoint headers So that we can include this header from userspace tools, like perf tools, to get the breakpoint types and len definitions. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad LKML-Reference: <1258863695-10464-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 0b98cbf..4659e0c 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -16,6 +16,7 @@ enum { HW_BREAKPOINT_X = 4, }; +#ifdef __KERNEL__ #ifdef CONFIG_HAVE_HW_BREAKPOINT static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) @@ -133,5 +134,6 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ +#endif /* __KERNEL__ */ #endif /* _LINUX_HW_BREAKPOINT_H */ -- cgit v0.10.2 From 12eac0bf0461910ae6dd7f071f156f75461a37cf Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Fri, 20 Nov 2009 12:37:17 +0900 Subject: perf bench: Make the mem/memcpy tests more user-friendly mem-memcpy.c uses perf event system calls to obtain CPU clocks. And it suddenly dies with BUG_ON() when it running on Linux doesn't support perf event. Also fail at calloc() can occur easily when too large length is passed. Fail of calloc() causes sudden death with assert(). These behaviours are not friendly. So I fixed the treating of errors. Signed-off-by: Hitoshi Mitake Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1258688237-3797-1-git-send-email-mitake@dcl.info.waseda.ac.jp> [ v2: improved a few small details ] Signed-off-by: Ingo Molnar diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index d4f4f98..5165fd1 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -22,9 +22,10 @@ #define K 1024 -static const char *length_str = "1MB"; -static const char *routine = "default"; -static int use_clock = 0; +static const char *length_str = "1MB"; +static const char *routine = "default"; +static int use_clock = 0; +static int clock_fd; static const struct option options[] = { OPT_STRING('l', "length", &length_str, "1MB", @@ -57,17 +58,19 @@ static const char * const bench_mem_memcpy_usage[] = { NULL }; -static int clock_fd; - static struct perf_event_attr clock_attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES }; static void init_clock(void) { clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); - BUG_ON(clock_fd < 0); + + if (clock_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(clock_fd < 0); } static u64 get_clock(void) @@ -104,7 +107,8 @@ int bench_mem_memcpy(int argc, const char **argv, tv_diff.tv_sec = 0; tv_diff.tv_usec = 0; length = (size_t)perf_atoll((char *)length_str); - if ((long long int)length <= 0) { + + if ((s64)length <= 0) { fprintf(stderr, "Invalid length:%s\n", length_str); return 1; } @@ -124,9 +128,12 @@ int bench_mem_memcpy(int argc, const char **argv, } dst = calloc(length, sizeof(char)); - assert(dst); + if (!dst) + die("memory allocation failed - maybe length is too large?\n"); + src = calloc(length, sizeof(char)); - assert(src); + if (!src) + die("memory allocation failed - maybe length is too large?\n"); if (bench_format == BENCH_FORMAT_DEFAULT) { printf("# Copying %s Bytes from %p to %p ...\n\n", @@ -136,8 +143,9 @@ int bench_mem_memcpy(int argc, const char **argv, if (use_clock) { init_clock(); clock_start = get_clock(); - } else + } else { BUG_ON(gettimeofday(&tv_start, NULL)); + } routines[i].fn(dst, src, length); @@ -176,9 +184,8 @@ int bench_mem_memcpy(int argc, const char **argv, printf("%lf\n", bps); break; default: - /* reaching here is something disaster */ - fprintf(stderr, "Unknown format:%d\n", bench_format); - exit(1); + /* reaching this means there's some disaster: */ + die("unknown format: %d\n", bench_format); break; } -- cgit v0.10.2 From 96b02d78a7e47cd189f6b307c5513fec6b2155dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1rton=20N=C3=A9meth?= Date: Sat, 21 Nov 2009 23:10:15 +0100 Subject: perf_event: Remove redundant zero fill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buffer is first zeroed out by memset(). Then strncpy() is used to fill the content. The strncpy() function also pads the string till the end of the specified length, which is redundant. The strncpy() does not ensures that the string will be properly closed with 0. Use strlcpy() instead. The semantic match that finds this kind of pattern is as follows: (http://coccinelle.lip6.fr/) // @@ expression buffer; expression size; expression str; @@ memset(buffer, 0, size); ... - strncpy( + strlcpy( buffer, str, sizeof(buffer) ); @@ expression buffer; expression size; expression str; @@ memset(&buffer, 0, size); ... - strncpy( + strlcpy( &buffer, str, sizeof(buffer)); @@ expression buffer; identifier field; expression size; expression str; @@ memset(buffer, 0, size); ... - strncpy( + strlcpy( buffer->field, str, sizeof(buffer->field) ); @@ expression buffer; identifier field; expression size; expression str; @@ memset(&buffer, 0, size); ... - strncpy( + strlcpy( buffer.field, str, sizeof(buffer.field)); // On strncpy() vs strlcpy() see http://www.gratisoft.us/todd/papers/strlcpy.html . Signed-off-by: Márton Németh Cc: Julia Lawall Cc: cocci@diku.dk Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <4B086547.5040100@freemail.hu> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index aba8227..b26cb03 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3391,7 +3391,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) char comm[TASK_COMM_LEN]; memset(comm, 0, sizeof(comm)); - strncpy(comm, comm_event->task->comm, sizeof(comm)); + strlcpy(comm, comm_event->task->comm, sizeof(comm)); size = ALIGN(strlen(comm)+1, sizeof(u64)); comm_event->comm = comm; -- cgit v0.10.2 From f3ced7cdb24e7968a353d828955fa2daf4167e72 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sun, 22 Nov 2009 11:58:00 +0200 Subject: perf kmem: Add --sort hit and --sort frag This patch adds support for "--sort hit" and "--sort frag" to the "perf kmem" tool. The former was already mentioned in the help text and the latter is useful for finding call-sites that exhibit worst case behavior for SLAB allocators. Signed-off-by: Pekka Enberg Cc: Li Zefan Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Eduard - Gabriel Munteanu Cc: linux-mm@kvack.org LKML-Reference: <1258883880-7149-1-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index f315b05..4145049 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -443,6 +443,15 @@ static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + if (l->hit < r->hit) + return -1; + else if (l->hit > r->hit) + return 1; + return 0; +} + static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) { if (l->bytes_alloc < r->bytes_alloc) @@ -452,6 +461,20 @@ static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + double x, y; + + x = fragmentation(l->bytes_req, l->bytes_alloc); + y = fragmentation(r->bytes_req, r->bytes_alloc); + + if (x < y) + return -1; + else if (x > y) + return 1; + return 0; +} + static int parse_sort_opt(const struct option *opt __used, const char *arg, int unset __used) { @@ -464,8 +487,12 @@ static int parse_sort_opt(const struct option *opt __used, sort_fn = ptr_cmp; else if (strcmp(arg, "call_site") == 0) sort_fn = callsite_cmp; + else if (strcmp(arg, "hit") == 0) + sort_fn = hit_cmp; else if (strcmp(arg, "bytes") == 0) sort_fn = bytes_cmp; + else if (strcmp(arg, "frag") == 0) + sort_fn = frag_cmp; else return -1; @@ -517,7 +544,7 @@ static const struct option kmem_options[] = { "stat selector, Pass 'alloc' or 'caller'.", parse_stat_opt), OPT_CALLBACK('s', "sort", NULL, "key", - "sort by key: ptr, call_site, hit, bytes", + "sort by key: ptr, call_site, hit, bytes, frag", parse_sort_opt), OPT_CALLBACK('l', "line", NULL, "num", "show n lins", -- cgit v0.10.2 From e57cfcdac6badd846a1cd831de54a1359c2d1eea Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Sun, 22 Nov 2009 12:29:44 +0200 Subject: perf symbols: Fix ELF header errors during "perf kmem record" The write_event() function in builtin-record.c writes out all mmap()'d DSOs including non-ELF files like GNOME resource files and such. Therefore, check for ELF_K_ELF in filename__read_build_id() before attempting to read the ELF header with gelf_getehdr(). Fixes the following error messages when running "perf kmem record": penberg@penberg-laptop:~/src/linux/tools/perf$ perf kmem record ^C[ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.753 MB perf.data (~32885 samples) ] filename__read_build_id: cannot get elf header. filename__read_build_id: cannot get elf header. filename__read_build_id: cannot get elf header. filename__read_build_id: cannot get elf header. filename__read_build_id: cannot get elf header. filename__read_build_id: cannot get elf header. filename__read_build_id: cannot get elf header. filename__read_build_id: cannot get elf header. filename__read_build_id: cannot get elf header. Signed-off-by: Pekka Enberg Cc: Arnaldo Carvalho de Melo Cc: Li Zefan Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Steven Rostedt LKML-Reference: <1258885784-11709-1-git-send-email-penberg@cs.helsinki.fi> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 86ec6c7..f56158f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -921,6 +921,7 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) GElf_Shdr shdr; Elf_Data *data; Elf_Scn *sec; + Elf_Kind ek; void *ptr; Elf *elf; @@ -937,6 +938,10 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) goto out_close; } + ek = elf_kind(elf); + if (ek != ELF_K_ELF) + goto out_elf_end; + if (gelf_getehdr(elf, &ehdr) == NULL) { pr_err("%s: cannot get elf header.\n", __func__); goto out_elf_end; -- cgit v0.10.2 From 645e8cc0c9f01f07f384fd522b782e5e6ae9de18 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 22 Nov 2009 12:20:19 +0100 Subject: perf_events: Fix modular build Fix: ERROR: "perf_swevent_put_recursion_context" [fs/ext4/ext4.ko] undefined! ERROR: "perf_swevent_get_recursion_context" [fs/ext4/ext4.ko] undefined! Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Steven Rostedt Cc: Masami Hiramatsu Cc: Jason Baron LKML-Reference: <1258864015-10579-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index b26cb03..abe1ef4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3903,11 +3903,13 @@ int perf_swevent_get_recursion_context(int **recursion) return 0; } +EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); void perf_swevent_put_recursion_context(int *recursion) { (*recursion)--; } +EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, u64 nr, int nmi, -- cgit v0.10.2 From 78a14e273d93dfbea9673f9b10398c538096302d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 21 Nov 2009 12:50:34 +0100 Subject: drivers/pcmcia: remove unnecessary kzalloc The result of calling kzalloc is never used or freed. The semantic match that finds this problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @r exists@ local idexpression x; statement S; expression E; identifier f,f1,l; position p1,p2; expression *ptr != NULL; @@ x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S <... when != x when != if (...) { <+...x...+> } ( x->f1 = E | (x->f1 == NULL || ...) | f(...,x->f1,...) ) ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c index deb5036..de6bc33 100644 --- a/drivers/pcmcia/sa1111_generic.c +++ b/drivers/pcmcia/sa1111_generic.c @@ -127,10 +127,6 @@ int sa1111_pcmcia_add(struct sa1111_dev *dev, struct pcmcia_low_level *ops, ops->socket_state = sa1111_pcmcia_socket_state; ops->socket_suspend = sa1111_pcmcia_socket_suspend; - s = kzalloc(sizeof(*s) * ops->nr, GFP_KERNEL); - if (!s) - return -ENODEV; - for (i = 0; i < ops->nr; i++) { s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) -- cgit v0.10.2 From 7baed9af4bf0d7850045e36d19a43a2c76872b62 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 22 Nov 2009 13:27:27 +0200 Subject: perf tools: Add V=2 option to help debug config issues Make standard error show up on console when V=2 is set. Signed-off-by: Michael S. Tsirkin Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091122112726.GC13644@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d7198c5..31da6be 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -2,6 +2,7 @@ all:: # Define V=1 to have a more verbose compile. +# Define V=2 to have an even more verbose compile. # # Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() # or vsnprintf() return -1 instead of number of characters which would @@ -263,7 +264,7 @@ PTHREAD_LIBS = -lpthread # explicitly what architecture to check for. Fix this up for yours.. SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ -ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null >/dev/null 2>&1 && echo y"), y) +ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null "$(QUIET_STDERR)" && echo y"), y) CFLAGS := $(CFLAGS) -fstack-protector-all endif @@ -448,6 +449,11 @@ BUILTIN_OBJS += builtin-kmem.o PERFLIBS = $(LIB_FILE) +ifeq ($(V), 2) + QUIET_STDERR = ">/dev/null" +else + QUIET_STDERR = ">/dev/null 2>&1" +endif # # Platform specific tweaks # @@ -475,19 +481,19 @@ ifeq ($(uname_S),Darwin) PTHREAD_LIBS = endif -ifeq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) -ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) +ifeq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) +ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]); endif - ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) + ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) BASIC_CFLAGS += -DLIBELF_NO_MMAP endif else msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); endif -ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) +ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231); BASIC_CFLAGS += -DNO_LIBDWARF else @@ -499,20 +505,20 @@ endif ifdef NO_DEMANGLE BASIC_CFLAGS += -DNO_DEMANGLE else - has_bfd := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd > /dev/null 2>&1 && echo y") + has_bfd := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd "$(QUIET_STDERR)" && echo y") ifeq ($(has_bfd),y) EXTLIBS += -lbfd else - has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty > /dev/null 2>&1 && echo y") + has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty "$(QUIET_STDERR)" && echo y") ifeq ($(has_bfd_iberty),y) EXTLIBS += -lbfd -liberty else - has_bfd_iberty_z := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") + has_bfd_iberty_z := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz "$(QUIET_STDERR)" && echo y") ifeq ($(has_bfd_iberty_z),y) EXTLIBS += -lbfd -liberty -lz else - has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty > /dev/null 2>&1 && echo y") + has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty "$(QUIET_STDERR)" && echo y") ifeq ($(has_cplus_demangle),y) EXTLIBS += -liberty BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE -- cgit v0.10.2 From b197c7ef7169bd5f11fb9d803b322d0daef7e256 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 22 Nov 2009 15:13:11 +0200 Subject: perf tools: Suggest static libraries as well On error, suggest installing static libraries along with shared libraries. Signed-off-by: Michael S. Tsirkin Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091122131311.GA24318@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 31da6be..fce4c3f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -483,7 +483,7 @@ endif ifeq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) - msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]); + msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); endif ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) @@ -523,7 +523,7 @@ else EXTLIBS += -liberty BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE else - msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) + msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling) BASIC_CFLAGS += -DNO_DEMANGLE endif endif -- cgit v0.10.2 From 87f8ea4cd3680ef7f4da4391aed97abb25eae333 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 22 Nov 2009 13:21:41 -0200 Subject: perf symbols: Show messages about module loading only if verbose >= 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Suggested-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258903301-20584-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index f56158f..74b5b8a 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1189,7 +1189,7 @@ static int dsos__set_modules_path_dir(char *dirname) DIR *dir = opendir(dirname); if (!dir) { - pr_err("%s: cannot open %s dir\n", __func__, dirname); + pr_debug("%s: cannot open %s dir\n", __func__, dirname); return -1; } @@ -1500,8 +1500,8 @@ int kernel_maps__init(bool use_modules) return -1; if (use_modules && kernel_maps__create_module_maps() < 0) - pr_warning("Failed to load list of modules in use, " - "continuing...\n"); + pr_debug("Failed to load list of modules in use, " + "continuing...\n"); /* * Now that we have all the maps created, just set the ->end of them: */ -- cgit v0.10.2 From 50e5095afa8c2be0f35e5c0e21d5f7912340e8f2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 22 Nov 2009 14:59:22 -0200 Subject: perf report: Do map lookups in resolve_callchain() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug introduced in 439d473b4777de510e1322168ac6f2f377ecd5bc, making the initial map be used for all IPs, so that symbols outside this initial map would either be erroneously resolved or not resolve at all. Reported-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1258909162-28496-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0d39e80..7e690f7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -467,7 +467,7 @@ static int call__match(struct symbol *sym) return 0; } -static struct symbol **resolve_callchain(struct thread *thread, struct map *map, +static struct symbol **resolve_callchain(struct thread *thread, struct ip_callchain *chain, struct symbol **parent) { @@ -496,10 +496,10 @@ static struct symbol **resolve_callchain(struct thread *thread, struct map *map, case PERF_CONTEXT_HV: break; case PERF_CONTEXT_KERNEL: - sym = kernel_maps__find_symbol(ip, &map, NULL); + sym = kernel_maps__find_symbol(ip, NULL, NULL); break; default: - sym = resolve_symbol(thread, &map, &ip); + sym = resolve_symbol(thread, NULL, &ip); break; } @@ -529,7 +529,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct hist_entry *he; if ((sort__has_parent || callchain) && chain) - syms = resolve_callchain(thread, map, chain, &parent); + syms = resolve_callchain(thread, chain, &parent); he = __hist_entry__add(thread, map, sym, parent, ip, count, level, &hit); -- cgit v0.10.2 From b668c9cf3e58739dac54a1d6f42f2b4bdd980b3e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 22 Nov 2009 08:53:48 -0800 Subject: rcu: Fix grace-period-stall bug on large systems with CPU hotplug When the last CPU of a given leaf rcu_node structure goes offline, all of the tasks queued on that leaf rcu_node structure (due to having blocked in their current RCU read-side critical sections) are requeued onto the root rcu_node structure. This requeuing is carried out by rcu_preempt_offline_tasks(). However, it is possible that these queued tasks are the only thing preventing the leaf rcu_node structure from reporting a quiescent state up the rcu_node hierarchy. Unfortunately, the old code would fail to do this reporting, resulting in a grace-period stall given the following sequence of events: 1. Kernel built for more than 32 CPUs on 32-bit systems or for more than 64 CPUs on 64-bit systems, so that there is more than one rcu_node structure. (Or CONFIG_RCU_FANOUT is artificially set to a number smaller than CONFIG_NR_CPUS.) 2. The kernel is built with CONFIG_TREE_PREEMPT_RCU. 3. A task running on a CPU associated with a given leaf rcu_node structure blocks while in an RCU read-side critical section -and- that CPU has not yet passed through a quiescent state for the current RCU grace period. This will cause the task to be queued on the leaf rcu_node's blocked_tasks[] array, in particular, on the element of this array corresponding to the current grace period. 4. Each of the remaining CPUs corresponding to this same leaf rcu_node structure pass through a quiescent state. However, the task is still in its RCU read-side critical section, so these quiescent states cannot be reported further up the rcu_node hierarchy. Nevertheless, all bits in the leaf rcu_node structure's ->qsmask field are now zero. 5. Each of the remaining CPUs go offline. (The events in step #4 and #5 can happen in any order as long as each CPU passes through a quiescent state before going offline.) 6. When the last CPU goes offline, __rcu_offline_cpu() will invoke rcu_preempt_offline_tasks(), which will move the task to the root rcu_node structure, but without reporting a quiescent state up the rcu_node hierarchy (and this failure to report a quiescent state is the bug). But because this leaf rcu_node structure's ->qsmask field is already zero and its ->block_tasks[] entries are all empty, force_quiescent_state() will skip this rcu_node structure. Therefore, grace periods are now hung. This patch abstracts some code out of rcu_read_unlock_special(), calling the result task_quiet() by analogy with cpu_quiet(), and invokes task_quiet() from both rcu_read_lock_special() and __rcu_offline_cpu(). Invoking task_quiet() from __rcu_offline_cpu() reports the quiescent state up the rcu_node hierarchy, fixing the bug. This ends up requiring a separate lock_class_key per level of the rcu_node hierarchy, which this patch also provides. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12589088301770-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 9b36d6d..b79bfcd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -51,7 +51,7 @@ /* Data structures. */ -static struct lock_class_key rcu_root_class; +static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; #define RCU_STATE_INITIALIZER(name) { \ .level = { &name.node[0] }, \ @@ -936,6 +936,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) { unsigned long flags; unsigned long mask; + int need_quiet = 0; struct rcu_data *rdp = rsp->rda[cpu]; struct rcu_node *rnp; @@ -949,29 +950,30 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->qsmaskinit &= ~mask; if (rnp->qsmaskinit != 0) { - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + if (rnp != rdp->mynode) + spin_unlock(&rnp->lock); /* irqs remain disabled. */ break; } - - /* - * If there was a task blocking the current grace period, - * and if all CPUs have checked in, we need to propagate - * the quiescent state up the rcu_node hierarchy. But that - * is inconvenient at the moment due to deadlock issues if - * this should end the current grace period. So set the - * offlined CPU's bit in ->qsmask in order to force the - * next force_quiescent_state() invocation to clean up this - * mess in a deadlock-free manner. - */ - if (rcu_preempt_offline_tasks(rsp, rnp, rdp) && !rnp->qsmask) - rnp->qsmask |= mask; - + if (rnp == rdp->mynode) + need_quiet = rcu_preempt_offline_tasks(rsp, rnp, rdp); + else + spin_unlock(&rnp->lock); /* irqs remain disabled. */ mask = rnp->grpmask; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ rnp = rnp->parent; } while (rnp != NULL); - spin_unlock_irqrestore(&rsp->onofflock, flags); + /* + * We still hold the leaf rcu_node structure lock here, and + * irqs are still disabled. The reason for this subterfuge is + * because invoking task_quiet() with ->onofflock held leads + * to deadlock. + */ + spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + rnp = rdp->mynode; + if (need_quiet) + task_quiet(rnp, flags); + else + spin_unlock_irqrestore(&rnp->lock, flags); rcu_adopt_orphan_cbs(rsp); } @@ -1731,6 +1733,7 @@ static void __init rcu_init_one(struct rcu_state *rsp) rnp = rsp->level[i]; for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { spin_lock_init(&rnp->lock); + lockdep_set_class(&rnp->lock, &rcu_node_class[i]); rnp->gpnum = 0; rnp->qsmask = 0; rnp->qsmaskinit = 0; @@ -1753,7 +1756,6 @@ static void __init rcu_init_one(struct rcu_state *rsp) INIT_LIST_HEAD(&rnp->blocked_tasks[1]); } } - lockdep_set_class(&rcu_get_root(rsp)->lock, &rcu_root_class); } /* diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 17a28a0..a81188c 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -305,6 +305,9 @@ static void rcu_bootup_announce(void); long rcu_batches_completed(void); static void rcu_preempt_note_context_switch(int cpu); static int rcu_preempted_readers(struct rcu_node *rnp); +#ifdef CONFIG_HOTPLUG_CPU +static void task_quiet(struct rcu_node *rnp, unsigned long flags); +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_RCU_CPU_STALL_DETECTOR static void rcu_print_task_stall(struct rcu_node *rnp); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 5ca2d26..0bdb592 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -160,11 +160,51 @@ static int rcu_preempted_readers(struct rcu_node *rnp) return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); } +/* + * Record a quiescent state for all tasks that were previously queued + * on the specified rcu_node structure and that were blocking the current + * RCU grace period. The caller must hold the specified rnp->lock with + * irqs disabled, and this lock is released upon return, but irqs remain + * disabled. + */ +static void task_quiet(struct rcu_node *rnp, unsigned long flags) + __releases(rnp->lock) +{ + unsigned long mask; + struct rcu_node *rnp_p; + + if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { + spin_unlock_irqrestore(&rnp->lock, flags); + return; /* Still need more quiescent states! */ + } + + rnp_p = rnp->parent; + if (rnp_p == NULL) { + /* + * Either there is only one rcu_node in the tree, + * or tasks were kicked up to root rcu_node due to + * CPUs going offline. + */ + cpu_quiet_msk_finish(&rcu_preempt_state, flags); + return; + } + + /* Report up the rest of the hierarchy. */ + mask = rnp->grpmask; + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + spin_lock(&rnp_p->lock); /* irqs already disabled. */ + cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags); +} + +/* + * Handle special cases during rcu_read_unlock(), such as needing to + * notify RCU core processing or task having blocked during the RCU + * read-side critical section. + */ static void rcu_read_unlock_special(struct task_struct *t) { int empty; unsigned long flags; - unsigned long mask; struct rcu_node *rnp; int special; @@ -213,30 +253,15 @@ static void rcu_read_unlock_special(struct task_struct *t) /* * If this was the last task on the current list, and if * we aren't waiting on any CPUs, report the quiescent state. - * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk() - * drop rnp->lock and restore irq. + * Note that task_quiet() releases rnp->lock. */ - if (!empty && rnp->qsmask == 0 && - !rcu_preempted_readers(rnp)) { - struct rcu_node *rnp_p; - - if (rnp->parent == NULL) { - /* Only one rcu_node in the tree. */ - cpu_quiet_msk_finish(&rcu_preempt_state, flags); - return; - } - /* Report up the rest of the hierarchy. */ - mask = rnp->grpmask; + if (empty) spin_unlock_irqrestore(&rnp->lock, flags); - rnp_p = rnp->parent; - spin_lock_irqsave(&rnp_p->lock, flags); - WARN_ON_ONCE(rnp->qsmask); - cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags); - return; - } - spin_unlock(&rnp->lock); + else + task_quiet(rnp, flags); + } else { + local_irq_restore(flags); } - local_irq_restore(flags); } /* @@ -303,6 +328,8 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) * rcu_node. The reason for not just moving them to the immediate * parent is to remove the need for rcu_read_unlock_special() to * make more than two attempts to acquire the target rcu_node's lock. + * Returns true if there were tasks blocking the current RCU grace + * period. * * Returns 1 if there was previously a task blocking the current grace * period on the specified rcu_node structure. @@ -316,7 +343,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, int i; struct list_head *lp; struct list_head *lp_root; - int retval = rcu_preempted_readers(rnp); + int retval; struct rcu_node *rnp_root = rcu_get_root(rsp); struct task_struct *tp; @@ -334,6 +361,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, * rcu_nodes in terms of gp_num value. This fact allows us to * move the blocked_tasks[] array directly, element by element. */ + retval = rcu_preempted_readers(rnp); for (i = 0; i < 2; i++) { lp = &rnp->blocked_tasks[i]; lp_root = &rnp_root->blocked_tasks[i]; @@ -346,7 +374,6 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, spin_unlock(&rnp_root->lock); /* irqs remain disabled */ } } - return retval; } @@ -512,6 +539,16 @@ static int rcu_preempted_readers(struct rcu_node *rnp) return 0; } +#ifdef CONFIG_HOTPLUG_CPU + +/* Because preemptible RCU does not exist, no quieting of tasks. */ +static void task_quiet(struct rcu_node *rnp, unsigned long flags) +{ + spin_unlock_irqrestore(&rnp->lock, flags); +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + #ifdef CONFIG_RCU_CPU_STALL_DETECTOR /* -- cgit v0.10.2 From 9f680ab41485edfdc96331b70afa7513aa0a7720 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 22 Nov 2009 08:53:49 -0800 Subject: rcu: Eliminate unneeded function wrapping The functions rcu_init() is a wrapper for __rcu_init(), and also sets up the CPU-hotplug notifier for rcu_barrier_cpu_hotplug(). But TINY_RCU doesn't need CPU-hotplug notification, and the rcu_barrier_cpu_hotplug() is a simple wrapper for rcu_cpu_notify(). So push rcu_init() out to kernel/rcutree.c and kernel/rcutiny.c and get rid of the wrapper function rcu_barrier_cpu_hotplug(). Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12589088302320-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 2c1fe83..a3b6272 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -38,7 +38,6 @@ void rcu_bh_qs(int cpu); #define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); -extern void __rcu_init(void); /* * Return the number of grace periods. @@ -69,7 +68,6 @@ static inline void synchronize_rcu_bh_expedited(void) } struct notifier_block; -extern int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu); #ifdef CONFIG_NO_HZ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 9642c6b..111a652 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -34,8 +34,6 @@ struct notifier_block; extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); -extern int rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); extern int rcu_needs_cpu(int cpu); extern int rcu_expedited_torture_stats(char *page); @@ -83,7 +81,6 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched_expedited(); } -extern void __rcu_init(void); extern void rcu_check_callbacks(int cpu, int user); extern long rcu_batches_completed(void); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 7625f20..eb6b534 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -161,28 +161,6 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_bh); #endif /* #ifndef CONFIG_TINY_RCU */ -static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - return rcu_cpu_notify(self, action, hcpu); -} - -void __init rcu_init(void) -{ - int i; - - __rcu_init(); - cpu_notifier(rcu_barrier_cpu_hotplug, 0); - - /* - * We don't need protection against CPU-hotplug here because - * this is called early in boot, before either interrupts - * or the scheduler are operational. - */ - for_each_online_cpu(i) - rcu_barrier_cpu_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)i); -} - void rcu_scheduler_starting(void) { WARN_ON(num_online_cpus() != 1); diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index b33ec3aa..9f6d9ff 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -178,15 +178,6 @@ static void rcu_process_callbacks(struct softirq_action *unused) } /* - * Null function to handle CPU being onlined. Longer term, we want to - * make TINY_RCU avoid using rcupdate.c, but later... - */ -int rcu_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) -{ - return NOTIFY_OK; -} - -/* * Wait for a grace period to elapse. But it is illegal to invoke * synchronize_sched() from within an RCU read-side critical section. * Therefore, any legal call to synchronize_sched() is a quiescent @@ -285,7 +276,7 @@ void rcu_barrier_sched(void) } EXPORT_SYMBOL_GPL(rcu_barrier_sched); -void __rcu_init(void) +void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b79bfcd..e3d3bbd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1644,8 +1644,8 @@ static void __cpuinit rcu_online_cpu(int cpu) /* * Handle CPU online/offline notification events. */ -int __cpuinit rcu_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int __cpuinit rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) { long cpu = (long)hcpu; @@ -1781,8 +1781,10 @@ do { \ } \ } while (0) -void __init __rcu_init(void) +void __init rcu_init(void) { + int i; + rcu_bootup_announce(); #ifdef CONFIG_RCU_CPU_STALL_DETECTOR printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); @@ -1791,6 +1793,15 @@ void __init __rcu_init(void) RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); __rcu_init_preempt(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); + + /* + * We don't need protection against CPU-hotplug here because + * this is called early in boot, before either interrupts + * or the scheduler are operational. + */ + cpu_notifier(rcu_cpu_notify, 0); + for_each_online_cpu(i) + rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)i); } #include "rcutree_plugin.h" -- cgit v0.10.2 From 6ebb237bece23275d1da149b61a342f0d4d06a08 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 22 Nov 2009 08:53:50 -0800 Subject: rcu: Re-arrange code to reduce #ifdef pain Remove #ifdefs from kernel/rcupdate.c and include/linux/rcupdate.h by moving code to include/linux/rcutiny.h, include/linux/rcutree.h, and kernel/rcutree.c. Also remove some definitions that are no longer used. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1258908830885-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 2f1bc42..24440f4 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,11 +52,6 @@ struct rcu_head { }; /* Exported common interfaces */ -#ifdef CONFIG_TREE_PREEMPT_RCU -extern void synchronize_rcu(void); -#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -#define synchronize_rcu synchronize_sched -#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ extern void synchronize_rcu_bh(void); extern void synchronize_sched(void); extern void rcu_barrier(void); @@ -67,13 +62,6 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); -extern void rcu_scheduler_starting(void); -#ifndef CONFIG_TINY_RCU -extern int rcu_needs_cpu(int cpu); -#else -static inline int rcu_needs_cpu(int cpu) { return 0; } -#endif -extern int rcu_scheduler_active; #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index a3b6272..c4ba9a7 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -39,6 +39,11 @@ void rcu_bh_qs(int cpu); #define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); +static inline int rcu_needs_cpu(int cpu) +{ + return 0; +} + /* * Return the number of grace periods. */ @@ -57,6 +62,8 @@ static inline long rcu_batches_completed_bh(void) extern int rcu_expedited_torture_stats(char *page); +#define synchronize_rcu synchronize_sched + static inline void synchronize_rcu_expedited(void) { synchronize_sched(); @@ -86,6 +93,10 @@ static inline void rcu_exit_nohz(void) #endif /* #else #ifdef CONFIG_NO_HZ */ +static inline void rcu_scheduler_starting(void) +{ +} + static inline void exit_rcu(void) { } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 111a652..c93eee5 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -35,12 +35,14 @@ struct notifier_block; extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); extern int rcu_needs_cpu(int cpu); +extern void rcu_scheduler_starting(void); extern int rcu_expedited_torture_stats(char *page); #ifdef CONFIG_TREE_PREEMPT_RCU extern void __rcu_read_lock(void); extern void __rcu_read_unlock(void); +extern void synchronize_rcu(void); extern void exit_rcu(void); #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ @@ -55,7 +57,7 @@ static inline void __rcu_read_unlock(void) preempt_enable(); } -#define __synchronize_sched() synchronize_rcu() +#define synchronize_rcu synchronize_sched static inline void exit_rcu(void) { diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index eb6b534..9b7fd47 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -44,7 +44,6 @@ #include #include #include -#include #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; @@ -53,8 +52,6 @@ struct lockdep_map rcu_lock_map = EXPORT_SYMBOL_GPL(rcu_lock_map); #endif -int rcu_scheduler_active __read_mostly; - /* * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. @@ -66,104 +63,3 @@ void wakeme_after_rcu(struct rcu_head *head) rcu = container_of(head, struct rcu_synchronize, head); complete(&rcu->completion); } - -#ifndef CONFIG_TINY_RCU - -#ifdef CONFIG_TREE_PREEMPT_RCU - -/** - * synchronize_rcu - wait until a grace period has elapsed. - * - * Control will return to the caller some time after a full grace - * period has elapsed, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. - */ -void synchronize_rcu(void) -{ - struct rcu_synchronize rcu; - - if (!rcu_scheduler_active) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_rcu); - -#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ - -/** - * synchronize_sched - wait until an rcu-sched grace period has elapsed. - * - * Control will return to the caller some time after a full rcu-sched - * grace period has elapsed, in other words after all currently executing - * rcu-sched read-side critical sections have completed. These read-side - * critical sections are delimited by rcu_read_lock_sched() and - * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), - * local_irq_disable(), and so on may be used in place of - * rcu_read_lock_sched(). - * - * This means that all preempt_disable code sequences, including NMI and - * hardware-interrupt handlers, in progress on entry will have completed - * before this primitive returns. However, this does not guarantee that - * softirq handlers will have completed, since in some kernels, these - * handlers can run in process context, and can block. - * - * This primitive provides the guarantees made by the (now removed) - * synchronize_kernel() API. In contrast, synchronize_rcu() only - * guarantees that rcu_read_lock() sections will have completed. - * In "classic RCU", these two guarantees happen to be one and - * the same, but can differ in realtime RCU implementations. - */ -void synchronize_sched(void) -{ - struct rcu_synchronize rcu; - - if (rcu_blocking_is_gp()) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_sched(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_sched); - -/** - * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. - * - * Control will return to the caller some time after a full rcu_bh grace - * period has elapsed, in other words after all currently executing rcu_bh - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), - * and may be nested. - */ -void synchronize_rcu_bh(void) -{ - struct rcu_synchronize rcu; - - if (rcu_blocking_is_gp()) - return; - - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu_bh(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); -} -EXPORT_SYMBOL_GPL(synchronize_rcu_bh); - -#endif /* #ifndef CONFIG_TINY_RCU */ - -void rcu_scheduler_starting(void) -{ - WARN_ON(num_online_cpus() != 1); - WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; -} diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e3d3bbd..4ca7e02 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "rcutree.h" @@ -79,6 +80,8 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); +static int rcu_scheduler_active __read_mostly; + /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s @@ -1396,6 +1399,68 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu_bh); +/** + * synchronize_sched - wait until an rcu-sched grace period has elapsed. + * + * Control will return to the caller some time after a full rcu-sched + * grace period has elapsed, in other words after all currently executing + * rcu-sched read-side critical sections have completed. These read-side + * critical sections are delimited by rcu_read_lock_sched() and + * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(), + * local_irq_disable(), and so on may be used in place of + * rcu_read_lock_sched(). + * + * This means that all preempt_disable code sequences, including NMI and + * hardware-interrupt handlers, in progress on entry will have completed + * before this primitive returns. However, this does not guarantee that + * softirq handlers will have completed, since in some kernels, these + * handlers can run in process context, and can block. + * + * This primitive provides the guarantees made by the (now removed) + * synchronize_kernel() API. In contrast, synchronize_rcu() only + * guarantees that rcu_read_lock() sections will have completed. + * In "classic RCU", these two guarantees happen to be one and + * the same, but can differ in realtime RCU implementations. + */ +void synchronize_sched(void) +{ + struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_sched(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_sched); + +/** + * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed. + * + * Control will return to the caller some time after a full rcu_bh grace + * period has elapsed, in other words after all currently executing rcu_bh + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), + * and may be nested. + */ +void synchronize_rcu_bh(void) +{ + struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu_bh(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_rcu_bh); + /* * Check to see if there is any immediate RCU-related work to be done * by the current CPU, for the specified type of RCU, returning 1 if so. @@ -1480,6 +1545,21 @@ int rcu_needs_cpu(int cpu) rcu_preempt_needs_cpu(cpu); } +/* + * This function is invoked towards the end of the scheduler's initialization + * process. Before this is called, the idle task might contain + * RCU read-side critical sections (during which time, this idle + * task is booting the system). After this function is called, the + * idle tasks are prohibited from containing RCU read-side critical + * sections. + */ +void rcu_scheduler_starting(void) +{ + WARN_ON(num_online_cpus() != 1); + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} + static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 0bdb592..1d295c7 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -425,6 +425,30 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu); +/** + * synchronize_rcu - wait until a grace period has elapsed. + * + * Control will return to the caller some time after a full grace + * period has elapsed, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +void synchronize_rcu(void) +{ + struct rcu_synchronize rcu; + + if (!rcu_scheduler_active) + return; + + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); +} +EXPORT_SYMBOL_GPL(synchronize_rcu); + /* * Wait for an rcu-preempt grace period. We are supposed to expedite the * grace period, but this is the crude slow compatability hack, so just -- cgit v0.10.2 From 85c3b529f8ad4d65ba86b982ef050212ae7dd976 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 20 Nov 2009 11:00:12 -0500 Subject: SELinux: header generation may hit infinite loop If a permission name is long enough the selinux class definition generation tool will go into a infinite loop. This is because it's macro max() is fooled into thinking it is dealing with unsigned numbers. This patch makes sure the macro always uses signed number so 1 > -1. Signed-off-by: Eric Paris Signed-off-by: James Morris diff --git a/scripts/selinux/genheaders/genheaders.c b/scripts/selinux/genheaders/genheaders.c index 771b86f..2462696 100644 --- a/scripts/selinux/genheaders/genheaders.c +++ b/scripts/selinux/genheaders/genheaders.c @@ -13,7 +13,7 @@ struct security_class_mapping { #include "classmap.h" #include "initial_sid_to_string.h" -#define max(x, y) ((x > y) ? x : y) +#define max(x, y) (((int)(x) > (int)(y)) ? x : y) const char *progname; -- cgit v0.10.2 From 81516c5fc83a13a1d12f466aa7e14f5fd62a63ce Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 22 Nov 2009 14:13:35 +0200 Subject: perf: Use default compiler mode by default gcc with no flags typically is a sane default for systems to use, and looking at the running kernel is probably broken for cross-builds anyway, so let's not do this. Add EXTRA_CFLAGS so that users can override default gcc mode if they want to. Signed-off-by: Michael S. Tsirkin Acked-by: Arjan van de Ven Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091122121335.GA24254@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index fce4c3f..3ef6621 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -148,6 +148,8 @@ all:: # broken, or spawning external process is slower than built-in grep perf has). # # Define LDFLAGS=-static to build a static binary. +# +# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN @@ -160,22 +162,6 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') -# -# Add -m32 for cross-builds: -# -ifdef NO_64BIT - MBITS := -m32 -else - # - # If we're on a 64-bit kernel (except ia64), use -m64: - # - ifneq ($(uname_M),ia64) - ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) - MBITS := -m64 - endif - endif -endif - # CFLAGS and LDFLAGS are for the users to override from the command line. # @@ -212,7 +198,7 @@ ifndef PERF_DEBUG CFLAGS_OPTIMIZE = -O6 endif -CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) +CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) -- cgit v0.10.2 From 98e4833ba3c314c99dc364012fba6ac894230ad0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Nov 2009 08:03:09 +0100 Subject: ring-buffer benchmark: Run producer/consumer threads at nice +19 The ring-buffer benchmark threads run on nice 0 by default, using up a lot of CPU time and slowing down the system: PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 1024 root 20 0 0 0 0 D 95.3 0.0 4:01.67 rb_producer 1023 root 20 0 0 0 0 R 93.5 0.0 2:54.33 rb_consumer 21569 mingo 40 0 14852 1048 772 R 3.6 0.1 0:00.05 top 1 root 40 0 4080 928 668 S 0.0 0.0 0:23.98 init Renice them to +19 to make them less intrusive. Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 70df73e..3875d49 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -399,6 +399,12 @@ static int __init ring_buffer_benchmark_init(void) if (IS_ERR(producer)) goto out_kill; + /* + * Run them as low-prio background tasks by default: + */ + set_user_nice(consumer, 19); + set_user_nice(producer, 19); + return 0; out_kill: -- cgit v0.10.2 From bfd451184d80301d1ae970b1ebffde1e9c6240f9 Mon Sep 17 00:00:00 2001 From: Simon Kaempflein Date: Mon, 16 Nov 2009 15:25:53 +1000 Subject: perf record, x86: Print more intelligent error message when sampling fails Print more accurate error message when "perf record" fails because there is no APIC support, on x86. Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 87f98fd..0e519c6 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -307,6 +307,12 @@ try_again: printf("\n"); error("perfcounter syscall returned with %d (%s)\n", fd[nr_cpu][counter], strerror(err)); + +#if defined(__i386__) || defined(__x86_64__) + if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) + die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n"); +#endif + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } -- cgit v0.10.2 From e670761f12f4069d204f433bf547d9c679a4fd05 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 21 Nov 2009 00:23:37 -0800 Subject: x86: apic: Remove not needed #ifdef Suresh made dmar_table_init() already have that protection. Signed-off-by: Yinghai Lu LKML-Reference: <4B07A739.3030104@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4c689f4..ad8c75b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1377,14 +1377,11 @@ void __init enable_IR_x2apic(void) unsigned long flags; struct IO_APIC_route_entry **ioapic_entries = NULL; int ret, x2apic_enabled = 0; - int dmar_table_init_ret = 0; + int dmar_table_init_ret; -#ifdef CONFIG_INTR_REMAP dmar_table_init_ret = dmar_table_init(); - if (dmar_table_init_ret) - pr_debug("dmar_table_init() failed with %d:\n", - dmar_table_init_ret); -#endif + if (dmar_table_init_ret && !x2apic_supported()) + return; ioapic_entries = alloc_ioapic_entries(); if (!ioapic_entries) { -- cgit v0.10.2 From 37ef2a3029fde884808ff1b369677abc7dd9a79a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 21 Nov 2009 00:23:37 -0800 Subject: x86: Re-get cfg_new in case reuse/move irq_desc When irq_desc is moved, we need to make sure to use the right cfg_new. Signed-off-by: Yinghai Lu LKML-Reference: <4B07A739.3030104@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ff23719..085e60e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3186,6 +3186,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) continue; desc_new = move_irq_desc(desc_new, node); + cfg_new = desc_new->chip_data; if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; -- cgit v0.10.2 From 163d3866cfa79aa5945f1ee5e43fb3ed1455f75c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 21 Nov 2009 00:23:37 -0800 Subject: x86: apic: Print out SRAT table APIC id in hex Make it consistent with APIC MADT print out, for big systems APIC id in hex is more readable. Signed-off-by: Yinghai Lu LKML-Reference: <4B07A739.3030104@kernel.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index dbb5381..9d7ce96 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -136,7 +136,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", pxm, apic_id, node); } @@ -170,7 +170,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) apicid_to_node[apic_id] = node; node_set(node, cpu_nodes_parsed); acpi_numa = 1; - printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n", + printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", pxm, apic_id, node); } -- cgit v0.10.2 From 6e3d8330ae2c4b2c11a9577a0130d2ecda1c610d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Nov 2009 10:19:20 +0100 Subject: perf events: Do not generate function trace entries in perf code Decreases perf overhead when function tracing is enabled, by about 50%. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 68537e9..1d2cb38 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -5,6 +5,7 @@ # Don't trace early stages of a secondary CPU boot ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_common.o = -pg +CFLAGS_REMOVE_perf_event.o = -pg endif # Make sure load_percpu_segment has no stackprotector diff --git a/kernel/Makefile b/kernel/Makefile index 17b575e..6b7ce81 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -21,6 +21,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg +CFLAGS_REMOVE_perf_event.o = -pg endif obj-$(CONFIG_FREEZER) += freezer.o -- cgit v0.10.2 From 457dc928f586f3f4b930206965e6db270034e97e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Nov 2009 11:03:28 +0100 Subject: tracing, function tracer: Clean up strstrip() usage Clean up strstrip() usage - which also addresses this build warning: kernel/trace/ftrace.c: In function 'ftrace_pid_write': kernel/trace/ftrace.c:3004: warning: ignoring return value of 'strstrip', declared with attribute warn_unused_result Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7f9b51e..1dc101d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2985,7 +2985,7 @@ static ssize_t ftrace_pid_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - char buf[64]; + char buf[64], *tmp; long val; int ret; @@ -3001,11 +3001,11 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf, * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid" * to clean the filter quietly. */ - strstrip(buf); - if (strlen(buf) == 0) + tmp = strstrip(buf); + if (strlen(tmp) == 0) return 1; - ret = strict_strtol(buf, 10, &val); + ret = strict_strtol(tmp, 10, &val); if (ret < 0) return ret; @@ -3391,4 +3391,3 @@ void ftrace_graph_stop(void) ftrace_stop(); } #endif - -- cgit v0.10.2 From 0e7810be30f66e9f430c4ce2cd3b14634211690f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 20 Nov 2009 14:00:14 +0000 Subject: x86: Suppress stack overrun message for init_task init_task doesn't get its stack end location set to STACK_END_MAGIC, and hence the message is confusing rather than helpful in this case. Signed-off-by: Jan Beulich LKML-Reference: <4B06AEFE02000078000211F4@vpn.id2.novell.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f4cee90..071eee6 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -658,7 +658,7 @@ no_context(struct pt_regs *regs, unsigned long error_code, show_fault_oops(regs, error_code, address); stackend = end_of_stack(tsk); - if (*stackend != STACK_END_MAGIC) + if (tsk != &init_task && *stackend != STACK_END_MAGIC) printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); tsk->thread.cr2 = address; -- cgit v0.10.2 From a4234bfcf4d72a10a99176cdef007345e9c3b4aa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 23 Nov 2009 10:57:59 +0100 Subject: perf_events: Optimize the swcounter hotpath The structure init creates a bit memcpy, which shows up big time in perf annotate output: : ffffffff810a859d <__perf_sw_event>: 1.68 : ffffffff810a859d: 55 push %rbp 1.69 : ffffffff810a859e: 41 89 fa mov %edi,%r10d 0.01 : ffffffff810a85a1: 49 89 c9 mov %rcx,%r9 0.00 : ffffffff810a85a4: 31 c0 xor %eax,%eax 1.71 : ffffffff810a85a6: b9 16 00 00 00 mov $0x16,%ecx 0.00 : ffffffff810a85ab: 48 89 e5 mov %rsp,%rbp 0.00 : ffffffff810a85ae: 48 83 ec 60 sub $0x60,%rsp 1.52 : ffffffff810a85b2: 48 8d 7d a0 lea -0x60(%rbp),%rdi 85.20 : ffffffff810a85b6: f3 ab rep stos %eax,%es:(%rdi) None of the callees depends on the structure being pre-initialized, so only initialize ->addr. This gets rid of the memcpy overhead. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index abe1ef4..20df8ab 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3954,12 +3954,12 @@ out: void __perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { - struct perf_sample_data data = { - .addr = addr, - }; + struct perf_sample_data data; - do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, - &data, regs); + data.addr = addr; + data.raw = NULL; + + do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); } static void perf_swevent_read(struct perf_event *event) -- cgit v0.10.2 From a66a3052e2d4c5815d7ad26887b1d4193206e691 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 11:37:23 +0100 Subject: perf_events: Undo copy/paste damage We had two almost identical functions, avoid the duplication. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo LKML-Reference: <20091123103819.537537928@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 20df8ab..e2daa10 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1704,16 +1704,10 @@ static void free_event(struct perf_event *event) call_rcu(&event->rcu_head, free_event_rcu); } -/* - * Called when the last reference to the file is gone. - */ -static int perf_release(struct inode *inode, struct file *file) +int perf_event_release_kernel(struct perf_event *event) { - struct perf_event *event = file->private_data; struct perf_event_context *ctx = event->ctx; - file->private_data = NULL; - WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); perf_event_remove_from_context(event); @@ -1728,26 +1722,19 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } +EXPORT_SYMBOL_GPL(perf_event_release_kernel); -int perf_event_release_kernel(struct perf_event *event) +/* + * Called when the last reference to the file is gone. + */ +static int perf_release(struct inode *inode, struct file *file) { - struct perf_event_context *ctx = event->ctx; - - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - perf_event_remove_from_context(event); - mutex_unlock(&ctx->mutex); - - mutex_lock(&event->owner->perf_event_mutex); - list_del_init(&event->owner_entry); - mutex_unlock(&event->owner->perf_event_mutex); - put_task_struct(event->owner); + struct perf_event *event = file->private_data; - free_event(event); + file->private_data = NULL; - return 0; + return perf_event_release_kernel(event); } -EXPORT_SYMBOL_GPL(perf_event_release_kernel); static int perf_event_read_size(struct perf_event *event) { -- cgit v0.10.2 From 6c2bfcbe58e0dd39554be88940149f5aa11e17d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 11:37:24 +0100 Subject: perf_events: Fix style nits Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20091123103819.613427378@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index e2daa10..1f14481 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -447,9 +447,8 @@ retry: * can remove the event safely, if the call above did not * succeed. */ - if (!list_empty(&event->group_entry)) { + if (!list_empty(&event->group_entry)) list_del_event(event, ctx); - } spin_unlock_irq(&ctx->lock); } @@ -1033,10 +1032,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx, update_context_time(ctx); perf_disable(); - if (ctx->nr_active) + if (ctx->nr_active) { list_for_each_entry(event, &ctx->group_list, group_entry) group_sched_out(event, cpuctx, ctx); - + } perf_enable(); out: spin_unlock(&ctx->lock); -- cgit v0.10.2 From 2e2af50b1fab3c40636839a7f439c167ae559533 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 11:37:25 +0100 Subject: perf_events: Disable events when we detach them If we leave the event in STATE_INACTIVE, any read of the event after the detach will increase the running count but not the enabled count and cause funny scaling artefacts. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20091123103819.689055515@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 1f14481..fb851ec 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -294,6 +294,8 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) if (event->group_leader != event) event->group_leader->nr_siblings--; + event->state = PERF_EVENT_STATE_OFF; + /* * If this was a group event with sibling events then * upgrade the siblings to singleton events by adding them -- cgit v0.10.2 From 5e942bb33371254a474653123cd9e13a4c89ee44 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 11:37:26 +0100 Subject: perf_events: Update the context time on exit It appeared we did call update_event_times() on exit, but we failed to update the context time, which renders the former moot. Locking is a bit iffy, we call update_event_times under ctx->mutex instead of ctx->lock - the next patch fixes this. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20091123103819.764207355@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index fb851ec..8be2574 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4983,6 +4983,7 @@ void perf_event_exit_task(struct task_struct *child) * the events from it. */ unclone_ctx(child_ctx); + update_context_time(child_ctx); spin_unlock_irqrestore(&child_ctx->lock, flags); /* -- cgit v0.10.2 From f67218c3e93abaf0f480bb94b53d234853ffe4de Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 11:37:27 +0100 Subject: perf_events: Fix __perf_event_exit_task() vs. update_event_times() locking Move the update_event_times() call in __perf_event_exit_task() into list_del_event() because that holds the proper lock (ctx->lock) and seems a more natural place to do the last time update. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20091123103819.842455480@chello.nl> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 8be2574..50f11b5 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -246,6 +246,44 @@ static void perf_unpin_context(struct perf_event_context *ctx) put_ctx(ctx); } +static inline u64 perf_clock(void) +{ + return cpu_clock(smp_processor_id()); +} + +/* + * Update the record of the current time in a context. + */ +static void update_context_time(struct perf_event_context *ctx) +{ + u64 now = perf_clock(); + + ctx->time += now - ctx->timestamp; + ctx->timestamp = now; +} + +/* + * Update the total_time_enabled and total_time_running fields for a event. + */ +static void update_event_times(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + u64 run_end; + + if (event->state < PERF_EVENT_STATE_INACTIVE || + event->group_leader->state < PERF_EVENT_STATE_INACTIVE) + return; + + event->total_time_enabled = ctx->time - event->tstamp_enabled; + + if (event->state == PERF_EVENT_STATE_INACTIVE) + run_end = event->tstamp_stopped; + else + run_end = ctx->time; + + event->total_time_running = run_end - event->tstamp_running; +} + /* * Add a event from the lists for its context. * Must be called with ctx->mutex and ctx->lock held. @@ -294,6 +332,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) if (event->group_leader != event) event->group_leader->nr_siblings--; + update_event_times(event); event->state = PERF_EVENT_STATE_OFF; /* @@ -454,44 +493,6 @@ retry: spin_unlock_irq(&ctx->lock); } -static inline u64 perf_clock(void) -{ - return cpu_clock(smp_processor_id()); -} - -/* - * Update the record of the current time in a context. - */ -static void update_context_time(struct perf_event_context *ctx) -{ - u64 now = perf_clock(); - - ctx->time += now - ctx->timestamp; - ctx->timestamp = now; -} - -/* - * Update the total_time_enabled and total_time_running fields for a event. - */ -static void update_event_times(struct perf_event *event) -{ - struct perf_event_context *ctx = event->ctx; - u64 run_end; - - if (event->state < PERF_EVENT_STATE_INACTIVE || - event->group_leader->state < PERF_EVENT_STATE_INACTIVE) - return; - - event->total_time_enabled = ctx->time - event->tstamp_enabled; - - if (event->state == PERF_EVENT_STATE_INACTIVE) - run_end = event->tstamp_stopped; - else - run_end = ctx->time; - - event->total_time_running = run_end - event->tstamp_running; -} - /* * Update total_time_enabled and total_time_running for all events in a group. */ @@ -4931,7 +4932,6 @@ __perf_event_exit_task(struct perf_event *child_event, { struct perf_event *parent_event; - update_event_times(child_event); perf_event_remove_from_context(child_event); parent_event = child_event->parent; -- cgit v0.10.2 From 4ed7c92d68a5387ba5f7030dc76eab03558e27f5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 11:37:29 +0100 Subject: perf_events: Undo some recursion damage Make perf_swevent_get_recursion_context return a context number and disable preemption. This could be used to remove the IRQ disable from the trace bit and index the per-cpu buffer with. Signed-off-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: <20091123103819.993226816@chello.nl> Signed-off-by: Ingo Molnar diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 74e98b1..43adbd7 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -874,8 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); -extern int perf_swevent_get_recursion_context(int **recursion); -extern void perf_swevent_put_recursion_context(int *recursion); +extern int perf_swevent_get_recursion_context(void); +extern void perf_swevent_put_recursion_context(int rctx); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -904,8 +904,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } -static int perf_swevent_get_recursion_context(int **recursion) { return -1; } -static void perf_swevent_put_recursion_context(int *recursion) { } +static inline int perf_swevent_get_recursion_context(void) { return -1; } +static inline void perf_swevent_put_recursion_context(int rctx) { } #endif diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c222ef5..c3417c1 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -724,8 +724,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - extern int perf_swevent_get_recursion_context(int **recursion); \ - extern void perf_swevent_put_recursion_context(int *recursion); \ + extern int perf_swevent_get_recursion_context(void); \ + extern void perf_swevent_put_recursion_context(int rctx); \ struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ @@ -736,8 +736,8 @@ static void ftrace_profile_##call(proto) \ int __data_size; \ char *trace_buf; \ char *raw_data; \ - int *recursion; \ int __cpu; \ + int rctx; \ int pc; \ \ pc = preempt_count(); \ @@ -753,8 +753,9 @@ static void ftrace_profile_##call(proto) \ \ local_irq_save(irq_flags); \ \ - if (perf_swevent_get_recursion_context(&recursion)) \ - goto end_recursion; \ + rctx = perf_swevent_get_recursion_context(); \ + if (rctx < 0) \ + goto end_recursion; \ \ __cpu = smp_processor_id(); \ \ @@ -781,9 +782,9 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ -end: \ - perf_swevent_put_recursion_context(recursion); \ -end_recursion: \ +end: \ + perf_swevent_put_recursion_context(rctx); \ +end_recursion: \ local_irq_restore(irq_flags); \ \ } diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 50f11b5..0b0d5f7 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3869,45 +3869,50 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, } } -/* - * Must be called with preemption disabled - */ -int perf_swevent_get_recursion_context(int **recursion) +int perf_swevent_get_recursion_context(void) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + int rctx; if (in_nmi()) - *recursion = &cpuctx->recursion[3]; + rctx = 3; else if (in_irq()) - *recursion = &cpuctx->recursion[2]; + rctx = 2; else if (in_softirq()) - *recursion = &cpuctx->recursion[1]; + rctx = 1; else - *recursion = &cpuctx->recursion[0]; + rctx = 0; - if (**recursion) + if (cpuctx->recursion[rctx]) { + put_cpu_var(perf_cpu_context); return -1; + } - (**recursion)++; + cpuctx->recursion[rctx]++; + barrier(); - return 0; + return rctx; } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); -void perf_swevent_put_recursion_context(int *recursion) +void perf_swevent_put_recursion_context(int rctx) { - (*recursion)--; + struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + barrier(); + cpuctx->recursion[rctx]++; + put_cpu_var(perf_cpu_context); } EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); -static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) +static void do_perf_sw_event(enum perf_type_id type, u32 event_id, + u64 nr, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) { + struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + cpuctx = &__get_cpu_var(perf_cpu_context); rcu_read_lock(); perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, nr, nmi, data, regs); @@ -3921,34 +3926,22 @@ static void __do_perf_sw_event(enum perf_type_id type, u32 event_id, rcu_read_unlock(); } -static void do_perf_sw_event(enum perf_type_id type, u32 event_id, - u64 nr, int nmi, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - int *recursion; - - preempt_disable(); - - if (perf_swevent_get_recursion_context(&recursion)) - goto out; - - __do_perf_sw_event(type, event_id, nr, nmi, data, regs); - - perf_swevent_put_recursion_context(recursion); -out: - preempt_enable(); -} - void __perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { struct perf_sample_data data; + int rctx; + + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) + return; data.addr = addr; data.raw = NULL; do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); + + perf_swevent_put_recursion_context(rctx); } static void perf_swevent_read(struct perf_event *event) @@ -4172,7 +4165,7 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, regs = task_pt_regs(current); /* Trace events already protected against recursion */ - __do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, + do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data, regs); } EXPORT_SYMBOL_GPL(perf_tp_event); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 22e6f68..79ce6a2 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1213,7 +1213,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, unsigned long irq_flags; char *trace_buf; char *raw_data; - int *recursion; + int rctx; pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); @@ -1229,7 +1229,8 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, */ local_irq_save(irq_flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; __cpu = smp_processor_id(); @@ -1258,7 +1259,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, perf_tp_event(call->id, entry->ip, 1, entry, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(irq_flags); @@ -1276,8 +1277,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, int size, __size, i, pc, __cpu; unsigned long irq_flags; char *trace_buf; - int *recursion; char *raw_data; + int rctx; pc = preempt_count(); __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); @@ -1293,7 +1294,8 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, */ local_irq_save(irq_flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; __cpu = smp_processor_id(); @@ -1323,7 +1325,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, perf_tp_event(call->id, entry->ret_ip, 1, entry, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(irq_flags); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 41b6dd9..9189cbe 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -481,8 +481,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) unsigned long flags; char *trace_buf; char *raw_data; - int *recursion; int syscall_nr; + int rctx; int size; int cpu; @@ -506,7 +506,8 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; cpu = smp_processor_id(); @@ -530,7 +531,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) perf_tp_event(sys_data->enter_id, 0, 1, rec, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(flags); } @@ -582,7 +583,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) int syscall_nr; char *trace_buf; char *raw_data; - int *recursion; + int rctx; int size; int cpu; @@ -609,7 +610,8 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); - if (perf_swevent_get_recursion_context(&recursion)) + rctx = perf_swevent_get_recursion_context(); + if (rctx < 0) goto end_recursion; cpu = smp_processor_id(); @@ -634,7 +636,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); end: - perf_swevent_put_recursion_context(recursion); + perf_swevent_put_recursion_context(rctx); end_recursion: local_irq_restore(flags); } -- cgit v0.10.2 From 9f800de38b05d84809e89f16671d636a140eede7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 12:45:25 +0100 Subject: x86/amd-iommu: un__init iommu_setup_msi This function may be called on the resume path and can not be dropped after booting. Cc: stable@kernel.org Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 0d4581e..72bdbda 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -926,7 +926,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) * ****************************************************************************/ -static int __init iommu_setup_msi(struct amd_iommu *iommu) +static int iommu_setup_msi(struct amd_iommu *iommu) { int r; -- cgit v0.10.2 From be831297716036de5b24308447ecb69f1706a846 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 12:50:00 +0100 Subject: x86/amd-iommu: attach devices to pre-allocated domains early For some devices the ACPI table may define unity map requirements which must me met when the IOMMU is enabled. So we need to attach devices to their domains as early as possible so that these mappings are in place when needed. This patch assigns the domains right after they are allocated. Otherwise this can result in I/O page faults before a driver binds to a device and BIOS is still using it. Cc: stable@kernel.org Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 093bd52..b74b212 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -2047,10 +2047,10 @@ static void prealloc_protection_domains(void) struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; struct amd_iommu *iommu; - u16 devid; + u16 devid, __devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - devid = calc_devid(dev->bus->number, dev->devfn); + __devid = devid = calc_devid(dev->bus->number, dev->devfn); if (devid > amd_iommu_last_bdf) continue; devid = amd_iommu_alias_table[devid]; @@ -2065,6 +2065,10 @@ static void prealloc_protection_domains(void) init_unity_mappings_for_device(dma_dom, devid); dma_dom->target_dev = devid; + attach_device(iommu, &dma_dom->domain, devid); + if (__devid != devid) + attach_device(iommu, &dma_dom->domain, __devid); + list_add_tail(&dma_dom->list, &iommu_pd_list); } } -- cgit v0.10.2 From acd1d7c1f8f3d848a3c5327dc09f8c1efb971678 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Nov 2009 15:00:36 +0100 Subject: perf_events: Restore sanity to scaling land It is quite possible to call update_event_times() on a context that isn't actually running and thereby confuse the thing. perf stat was reporting !100% scale values for software counters (2e2af50b perf_events: Disable events when we detach them, solved the worst of that, but there was still some left). The thing that happens is that because we are not self-reaping (we have a caring parent) there is a time between the last schedule (out) and having do_exit() called which will detach the events. This period would be accounted as enabled,!running because the event->state==INACTIVE, even though !event->ctx->is_active. Similar issues could have been observed by calling read() on a event while the attached task was not scheduled in. Solve this by teaching update_event_times() about ctx->is_active. Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1258984836.4531.480.camel@laptop> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 0b0d5f7..0aafe85 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -274,7 +274,12 @@ static void update_event_times(struct perf_event *event) event->group_leader->state < PERF_EVENT_STATE_INACTIVE) return; - event->total_time_enabled = ctx->time - event->tstamp_enabled; + if (ctx->is_active) + run_end = ctx->time; + else + run_end = event->tstamp_stopped; + + event->total_time_enabled = run_end - event->tstamp_enabled; if (event->state == PERF_EVENT_STATE_INACTIVE) run_end = event->tstamp_stopped; -- cgit v0.10.2 From ba6909b719a5ccc0c8100d2895bb7ff557b2eeae Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 23 Nov 2009 21:17:13 +0530 Subject: hw-breakpoint: Attribute authorship of hw-breakpoint related files Attribute authorship to developers of hw-breakpoint related files. Signed-off-by: K.Prasad Cc: Alan Stern Cc: Frederic Weisbecker LKML-Reference: <20091123154713.GA5593@in.ibm.com> [ v2: moved it to latest -tip ] Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 752daeb..4d267fb 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -16,6 +16,10 @@ * Copyright (C) 2007 Alan Stern * Copyright (C) 2009 IBM Corporation * Copyright (C) 2009 Frederic Weisbecker + * + * Authors: Alan Stern + * K.Prasad + * Frederic Weisbecker */ /* diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index b6d6fa2..c166622 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -18,6 +18,10 @@ * Copyright (C) 2009, Frederic Weisbecker * * Thanks to Ingo Molnar for his many suggestions. + * + * Authors: Alan Stern + * K.Prasad + * Frederic Weisbecker */ /* diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c index 5bc9819..9506381 100644 --- a/samples/hw_breakpoint/data_breakpoint.c +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -23,6 +23,8 @@ * that variable. * * Copyright (C) IBM Corporation, 2009 + * + * Author: K.Prasad */ #include /* Needed by all modules */ #include /* Needed for KERN_INFO */ -- cgit v0.10.2 From e6db4876575f3fdd5b1df2cbff826df95ab9af6a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 23 Nov 2009 15:42:32 +0100 Subject: hw-breakpoints: Include only linux/perf_event.h from kernel part of bp headers As userspace only needs the breakpoints enum types from the breakpoints headers. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad LKML-Reference: <1258987355-8751-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 4659e0c..76a48ab 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -1,8 +1,6 @@ #ifndef _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H -#include - enum { HW_BREAKPOINT_LEN_1 = 1, HW_BREAKPOINT_LEN_2 = 2, @@ -19,6 +17,8 @@ enum { #ifdef __KERNEL__ #ifdef CONFIG_HAVE_HW_BREAKPOINT +#include + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; -- cgit v0.10.2 From fdf6bc95229821e3d9405eba28925b76e92b74d0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 23 Nov 2009 15:42:33 +0100 Subject: hw-breakpoints: Check the breakpoint params from perf tools Perf tools create perf events as disabled in the beginning. Breakpoints are then considered like ptrace temporary breakpoints, only meant to reserve a breakpoint slot until we get all the necessary informations from the user. In this case, we don't check the address that is breakpointed as it is NULL in the ptrace case. But perf tools don't have the same purpose, events are created disabled to wait for all events to be created before enabling all of them. We want to check the breakpoint parameters in this case. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad LKML-Reference: <1258987355-8751-2-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c166622..06d372f 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -267,7 +267,16 @@ int __register_perf_hw_breakpoint(struct perf_event *bp) if (ret) return ret; - if (!bp->attr.disabled) + /* + * Ptrace breakpoints can be temporary perf events only + * meant to reserve a slot. In this case, it is created disabled and + * we don't want to check the params right now (as we put a null addr) + * But perf tools create events as disabled and we want to check + * the params for them. + * This is a quick hack that will be removed soon, once we remove + * the tmp breakpoints from ptrace + */ + if (!bp->attr.disabled || bp->callback == perf_bp_event) ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); return ret; -- cgit v0.10.2 From f5ffe02e5046003ae7e2ce70d3d1c2a73331268b Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 23 Nov 2009 15:42:34 +0100 Subject: perf: Add kernel side syscall events support for breakpoints Add the remaining necessary bits to support breakpoints created through perf syscall. We don't use the software counter interface as: - We don't need to check against recursion, this is already done in hardware breakpoints arch level. - We already know the perf event we are dealing with when the event is to be committed. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad LKML-Reference: <1258987355-8751-3-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 0aafe85..9425c96 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3831,6 +3831,20 @@ static int perf_swevent_is_counting(struct perf_event *event) static int perf_tp_event_match(struct perf_event *event, struct perf_sample_data *data); +static int perf_exclude_event(struct perf_event *event, + struct pt_regs *regs) +{ + if (regs) { + if (event->attr.exclude_user && user_mode(regs)) + return 1; + + if (event->attr.exclude_kernel && !user_mode(regs)) + return 1; + } + + return 0; +} + static int perf_swevent_match(struct perf_event *event, enum perf_type_id type, u32 event_id, @@ -3842,16 +3856,12 @@ static int perf_swevent_match(struct perf_event *event, if (event->attr.type != type) return 0; + if (event->attr.config != event_id) return 0; - if (regs) { - if (event->attr.exclude_user && user_mode(regs)) - return 0; - - if (event->attr.exclude_kernel && !user_mode(regs)) - return 0; - } + if (perf_exclude_event(event, regs)) + return 0; if (event->attr.type == PERF_TYPE_TRACEPOINT && !perf_tp_event_match(event, data)) @@ -4282,9 +4292,15 @@ static const struct pmu *bp_perf_event_init(struct perf_event *bp) return &perf_ops_bp; } -void perf_bp_event(struct perf_event *bp, void *regs) +void perf_bp_event(struct perf_event *bp, void *data) { - /* TODO */ + struct perf_sample_data sample; + struct pt_regs *regs = data; + + sample.addr = bp->attr.bp_addr; + + if (!perf_exclude_event(bp, regs)) + perf_swevent_add(bp, 1, 1, &sample, regs); } #else static void bp_perf_event_destroy(struct perf_event *event) -- cgit v0.10.2 From 1b290d670ffa883b7e062177463a8efd00eaa2c1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 23 Nov 2009 15:42:35 +0100 Subject: perf tools: Add support for breakpoint events in perf tools Add the breakpoint events support with this new sysnopsis: mem:addr[:access] Where addr is a raw addr value in the kernel and access can be either [r][w][x] Example to profile tasklist_lock: $ grep tasklist_lock /proc/kallsyms ffffffff8189c000 D tasklist_lock $ perf record -e mem:0xffffffff8189c000:rw -a -f -c 1 $ perf report # Samples: 62 # # Overhead Command Shared Object Symbol # ........ ............... ............. ...... # 29.03% swapper [kernel] [k] _raw_read_trylock 29.03% swapper [kernel] [k] _raw_read_unlock 19.35% init [kernel] [k] _raw_read_trylock 19.35% init [kernel] [k] _raw_read_unlock 1.61% events/0 [kernel] [k] _raw_read_trylock 1.61% events/0 [kernel] [k] _raw_read_unlock Coming soon: - Support for symbols in the event definition. - Default period to 1 for breakpoint events because these are not high frequency events. The same thing is needed for trace events. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad LKML-Reference: <1258987355-8751-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 0ff23de..fc46c0b 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -26,11 +26,19 @@ OPTIONS -e:: --event=:: - Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + Select the PMU event. Selection can be: + - a symbolic event name (use 'perf list' to list all events) + + - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + + - a hardware breakpoint event in the form of '\mem:addr[:access]' + where addr is the address in memory you want to break in. + Access is the memory access type (read, write, execute) it can + be passed as follows: '\mem:addr[:[r][w][x]]'. + If you want to profile read-write accesses in 0x1000, just set + 'mem:0x1000:rw'. -a:: System-wide collection. diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 0faf4f2..0700274 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,4 +1,4 @@ - +#include "../../../include/linux/hw_breakpoint.h" #include "util.h" #include "../perf.h" #include "parse-options.h" @@ -540,6 +540,81 @@ static enum event_result parse_tracepoint_event(const char **strp, attr, strp); } +static enum event_result +parse_breakpoint_type(const char *type, const char **strp, + struct perf_event_attr *attr) +{ + int i; + + for (i = 0; i < 3; i++) { + if (!type[i]) + break; + + switch (type[i]) { + case 'r': + attr->bp_type |= HW_BREAKPOINT_R; + break; + case 'w': + attr->bp_type |= HW_BREAKPOINT_W; + break; + case 'x': + attr->bp_type |= HW_BREAKPOINT_X; + break; + default: + return EVT_FAILED; + } + } + if (!attr->bp_type) /* Default */ + attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; + + *strp = type + i; + + return EVT_HANDLED; +} + +static enum event_result +parse_breakpoint_event(const char **strp, struct perf_event_attr *attr) +{ + const char *target; + const char *type; + char *endaddr; + u64 addr; + enum event_result err; + + target = strchr(*strp, ':'); + if (!target) + return EVT_FAILED; + + if (strncmp(*strp, "mem", target - *strp) != 0) + return EVT_FAILED; + + target++; + + addr = strtoull(target, &endaddr, 0); + if (target == endaddr) + return EVT_FAILED; + + attr->bp_addr = addr; + *strp = endaddr; + + type = strchr(target, ':'); + + /* If no type is defined, just rw as default */ + if (!type) { + attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; + } else { + err = parse_breakpoint_type(++type, strp, attr); + if (err == EVT_FAILED) + return EVT_FAILED; + } + + /* We should find a nice way to override the access type */ + attr->bp_len = HW_BREAKPOINT_LEN_4; + attr->type = PERF_TYPE_BREAKPOINT; + + return EVT_HANDLED; +} + static int check_events(const char *str, unsigned int i) { int n; @@ -673,6 +748,10 @@ parse_event_symbols(const char **str, struct perf_event_attr *attr) if (ret != EVT_FAILED) goto modifier; + ret = parse_breakpoint_event(str, attr); + if (ret != EVT_FAILED) + goto modifier; + fprintf(stderr, "invalid or unsupported event: '%s'\n", *str); fprintf(stderr, "Run 'perf list' for a list of valid events\n"); return EVT_FAILED; @@ -859,6 +938,9 @@ void print_events(void) "rNNN"); printf("\n"); + printf(" %-42s [hardware breakpoint]\n", "mem:[:access]"); + printf("\n"); + print_tracepoint_events(); exit(129); -- cgit v0.10.2 From 429947248f814e90f416ab4f68a871ab628000c3 Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Fri, 20 Nov 2009 17:40:37 +0100 Subject: sched_feat_write(): Update ppos instead of file->f_pos sched_feat_write() should update ppos instead of file->f_pos. (This reduces some BKL dependencies of this code.) Signed-off-by: Jan Blunck Cc: jkacur@redhat.com Cc: Arnd Bergmann Cc: Frederic Weisbecker Cc: Jamie Lokier Cc: Peter Zijlstra Cc: Christoph Hellwig Cc: Alan Cox LKML-Reference: <1258735245-25826-8-git-send-email-jblunck@suse.de> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index ab9a034..93474a7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -771,7 +771,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, if (!sched_feat_names[i]) return -EINVAL; - filp->f_pos += cnt; + *ppos += cnt; return cnt; } -- cgit v0.10.2 From 0444c9bd0cf4e0eb946a7fcaf34765accfa9404a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 20 Nov 2009 14:03:05 +0000 Subject: x86: Tighten conditionals on MCE related statistics irq_thermal_count is only being maintained when X86_THERMAL_VECTOR, and both X86_THERMAL_VECTOR and X86_MCE_THRESHOLD don't need extra wrapping in X86_MCE conditionals. Signed-off-by: Jan Beulich Cc: Hidetoshi Seto Cc: Yong Wang Cc: Suresh Siddha Cc: Andi Kleen Cc: Borislav Petkov Cc: Arjan van de Ven LKML-Reference: <4B06AFA902000078000211F8@vpn.id2.novell.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 82e3e8f..108eb6f 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -20,11 +20,11 @@ typedef struct { unsigned int irq_call_count; unsigned int irq_tlb_count; #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR unsigned int irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD unsigned int irq_threshold_count; -# endif #endif } ____cacheline_aligned irq_cpustat_t; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 04bbd52..19212cb 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec) seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); seq_printf(p, " TLB shootdowns\n"); #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR seq_printf(p, "%*s: ", prec, "TRM"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); seq_printf(p, " Thermal event interrupts\n"); -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD seq_printf(p, "%*s: ", prec, "THR"); for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); seq_printf(p, " Threshold APIC interrupts\n"); -# endif #endif #ifdef CONFIG_X86_MCE seq_printf(p, "%*s: ", prec, "MCE"); @@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += irq_stats(cpu)->irq_call_count; sum += irq_stats(cpu)->irq_tlb_count; #endif -#ifdef CONFIG_X86_MCE +#ifdef CONFIG_X86_THERMAL_VECTOR sum += irq_stats(cpu)->irq_thermal_count; -# ifdef CONFIG_X86_MCE_THRESHOLD +#endif +#ifdef CONFIG_X86_MCE_THRESHOLD sum += irq_stats(cpu)->irq_threshold_count; -# endif #endif #ifdef CONFIG_X86_MCE sum += per_cpu(mce_exception_count, cpu); -- cgit v0.10.2 From cc612d8199089413719397c9d92e5823da578eac Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 Nov 2009 16:39:10 -0200 Subject: perf symbols: Look for vmlinux in more places MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we can check the buildid to see if it really matches, this can be done safely: vmlinux /boot/vmlinux /boot/vmlinux- /lib/modules//build/vmlinux /usr/lib/debug/lib/modules/%s/vmlinux More can be added - if you know about distros that put the vmlinux somewhere else please let us know. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259001550-8194-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2031527..6b13a1e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -37,6 +37,7 @@ static bool use_modules; static unsigned long page_size; static unsigned long mmap_window = 32; +const char *vmlinux_name; struct sym_hist { u64 sum; @@ -637,7 +638,7 @@ static int __cmd_annotate(void) exit(0); } - if (kernel_maps__init(use_modules) < 0) { + if (kernel_maps__init(vmlinux_name, true, use_modules) < 0) { pr_err("failed to create kernel maps for symbol resolution\b"); return -1; } diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 4145049..5d8aeae 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -291,7 +291,7 @@ static int read_events(void) register_idle_thread(); register_perf_file_handler(&file_handler); - return mmap_dispatch_perf_file(&header, input_name, 0, 0, + return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0, &cwdlen, &cwd); } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7e690f7..fe474b7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -52,6 +52,7 @@ static char *pretty_printing_style = default_pretty_printing_style; static int exclude_other = 1; static char callchain_default_opt[] = "fractal,0.5"; +const char *vmlinux_name; static char *cwd; static int cwdlen; @@ -925,8 +926,9 @@ static int __cmd_report(void) register_perf_file_handler(&file_handler); - ret = mmap_dispatch_perf_file(&header, input_name, force, full_paths, - &cwdlen, &cwd); + ret = mmap_dispatch_perf_file(&header, input_name, vmlinux_name, + !vmlinux_name, force, + full_paths, &cwdlen, &cwd); if (ret) return ret; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index df44b75..260f57a 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1718,7 +1718,8 @@ static int read_events(void) register_idle_thread(); register_perf_file_handler(&file_handler); - return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); + return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0, + &cwdlen, &cwd); } static void print_bad_events(void) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ea49c2e..eef9caa 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -79,6 +79,7 @@ static int dump_symtab = 0; static bool hide_kernel_symbols = false; static bool hide_user_symbols = false; static struct winsize winsize; +const char *vmlinux_name; static const char *graph_line = "_____________________________________________________________________" "_____________________________________________________________________"; @@ -1341,7 +1342,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (delay_secs < 1) delay_secs = 1; - err = kernel_maps__init(true); + err = kernel_maps__init(vmlinux_name, !vmlinux_name, true); if (err < 0) return err; parse_source(sym_filter_entry); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d042d65..b71198e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -131,7 +131,8 @@ static int __cmd_trace(void) register_idle_thread(); register_perf_file_handler(&file_handler); - return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); + return mmap_dispatch_perf_file(&header, input_name, NULL, false, + 0, 0, &cwdlen, &cwd); } static const char * const annotate_usage[] = { diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index e7b6c2b..f318d19 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -101,6 +101,8 @@ out: int mmap_dispatch_perf_file(struct perf_header **pheader, const char *input_name, + const char *vmlinux_name, + bool try_vmlinux_path, int force, int full_paths, int *cwdlen, @@ -171,7 +173,7 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, goto out_delete; err = -ENOMEM; - if (kernel_maps__init(true) < 0) { + if (kernel_maps__init(vmlinux_name, try_vmlinux_path, true) < 0) { pr_err("failed to setup the kernel maps to resolve symbols\n"); goto out_delete; } diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h index ae036ec..3f0d21b 100644 --- a/tools/perf/util/data_map.h +++ b/tools/perf/util/data_map.h @@ -23,6 +23,8 @@ struct perf_file_handler { void register_perf_file_handler(struct perf_file_handler *handler); int mmap_dispatch_perf_file(struct perf_header **pheader, const char *input_name, + const char *vmlinux_name, + bool try_vmlinux_path, int force, int full_paths, int *cwdlen, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ac3410b..1332f8e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -257,7 +257,7 @@ static int perf_header__adds_write(struct perf_header *self, int fd) * Read the kernel buildid nad the list of loaded modules with * its build_ids: */ - kernel_maps__init(true); + kernel_maps__init(NULL, false, true); /* Write build-ids */ buildid_sec->offset = lseek(fd, 0, SEEK_CUR); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 74b5b8a..44d81d5 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -34,6 +34,8 @@ static void kernel_maps__insert(struct map *map); static int dso__load_kernel_sym(struct dso *self, struct map *map, symbol_filter_t filter); unsigned int symbol__priv_size; +static int vmlinux_path__nr_entries; +static char **vmlinux_path; static struct rb_root kernel_maps; @@ -1386,15 +1388,43 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, static int dso__load_kernel_sym(struct dso *self, struct map *map, symbol_filter_t filter) { - int err = dso__load_vmlinux(self, map, self->name, filter); + int err; + bool is_kallsyms; + + if (vmlinux_path != NULL) { + int i; + pr_debug("Looking at the vmlinux_path (%d entries long)\n", + vmlinux_path__nr_entries); + for (i = 0; i < vmlinux_path__nr_entries; ++i) { + err = dso__load_vmlinux(self, map, vmlinux_path[i], + filter); + if (err > 0) { + pr_debug("Using %s for symbols\n", + vmlinux_path[i]); + dso__set_long_name(self, + strdup(vmlinux_path[i])); + goto out_fixup; + } + } + } + + is_kallsyms = self->long_name[0] == '['; + if (is_kallsyms) + goto do_kallsyms; + err = dso__load_vmlinux(self, map, self->long_name, filter); if (err <= 0) { + pr_info("The file %s cannot be used, " + "trying to use /proc/kallsyms...", self->long_name); + sleep(2); +do_kallsyms: err = kernel_maps__load_kallsyms(filter); - if (err > 0) + if (err > 0 && !is_kallsyms) dso__set_long_name(self, strdup("[kernel.kallsyms]")); } if (err > 0) { +out_fixup: map__fixup_start(map); map__fixup_end(map); } @@ -1403,9 +1433,7 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, } LIST_HEAD(dsos); -struct dso *vdso; - -const char *vmlinux_name = "vmlinux"; +struct dso *vdso; static void dsos__add(struct dso *dso) { @@ -1457,9 +1485,9 @@ size_t dsos__fprintf_buildid(FILE *fp) return ret; } -static int kernel_maps__create_kernel_map(void) +static int kernel_maps__create_kernel_map(const char *vmlinux_name) { - struct dso *kernel = dso__new(vmlinux_name); + struct dso *kernel = dso__new(vmlinux_name ?: "[kernel.kallsyms]"); if (kernel == NULL) return -1; @@ -1468,10 +1496,10 @@ static int kernel_maps__create_kernel_map(void) if (kernel_map == NULL) goto out_delete_kernel_dso; - kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; + kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; + kernel->short_name = "[kernel]"; + kernel->kernel = 1; - kernel->short_name = "[kernel]"; - kernel->kernel = 1; vdso = dso__new("[vdso]"); if (vdso == NULL) goto out_delete_kernel_map; @@ -1494,11 +1522,72 @@ out_delete_kernel_dso: return -1; } -int kernel_maps__init(bool use_modules) +static void vmlinux_path__exit(void) +{ + while (--vmlinux_path__nr_entries >= 0) { + free(vmlinux_path[vmlinux_path__nr_entries]); + vmlinux_path[vmlinux_path__nr_entries] = NULL; + } + + free(vmlinux_path); + vmlinux_path = NULL; +} + +static int vmlinux_path__init(void) +{ + struct utsname uts; + char bf[PATH_MAX]; + + if (uname(&uts) < 0) + return -1; + + vmlinux_path = malloc(sizeof(char *) * 5); + if (vmlinux_path == NULL) + return -1; + + vmlinux_path[vmlinux_path__nr_entries] = strdup("vmlinux"); + if (vmlinux_path[vmlinux_path__nr_entries] == NULL) + goto out_fail; + ++vmlinux_path__nr_entries; + vmlinux_path[vmlinux_path__nr_entries] = strdup("/boot/vmlinux"); + if (vmlinux_path[vmlinux_path__nr_entries] == NULL) + goto out_fail; + ++vmlinux_path__nr_entries; + snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release); + vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); + if (vmlinux_path[vmlinux_path__nr_entries] == NULL) + goto out_fail; + ++vmlinux_path__nr_entries; + snprintf(bf, sizeof(bf), "/lib/modules/%s/build/vmlinux", uts.release); + vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); + if (vmlinux_path[vmlinux_path__nr_entries] == NULL) + goto out_fail; + ++vmlinux_path__nr_entries; + snprintf(bf, sizeof(bf), "/usr/lib/debug/lib/modules/%s/vmlinux", + uts.release); + vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); + if (vmlinux_path[vmlinux_path__nr_entries] == NULL) + goto out_fail; + ++vmlinux_path__nr_entries; + + return 0; + +out_fail: + vmlinux_path__exit(); + return -1; +} + +int kernel_maps__init(const char *vmlinux_name, bool try_vmlinux_path, + bool use_modules) { - if (kernel_maps__create_kernel_map() < 0) + if (try_vmlinux_path && vmlinux_path__init() < 0) return -1; + if (kernel_maps__create_kernel_map(vmlinux_name) < 0) { + vmlinux_path__exit(); + return -1; + } + if (use_modules && kernel_maps__create_module_maps() < 0) pr_debug("Failed to load list of modules in use, " "continuing...\n"); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 7a12904..8c4d026 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -93,7 +93,8 @@ int sysfs__read_build_id(const char *filename, void *bf, size_t size); bool dsos__read_build_ids(void); int build_id__sprintf(u8 *self, int len, char *bf); -int kernel_maps__init(bool use_modules); +int kernel_maps__init(const char *vmlinux_name, bool try_vmlinux_path, + bool use_modules); size_t kernel_maps__fprintf(FILE *fp); void symbol__init(unsigned int priv_size); @@ -101,5 +102,4 @@ void symbol__init(unsigned int priv_size); extern struct list_head dsos; extern struct map *kernel_map; extern struct dso *vdso; -extern const char *vmlinux_name; #endif /* __PERF_SYMBOL */ -- cgit v0.10.2 From 27c13ecec4d8856687b50b959e1146845b478f95 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 21 Nov 2009 14:01:45 +0100 Subject: x86, cpu: mv display_cacheinfo -> cpu_detect_cache_sizes display_cacheinfo() doesn't display anything anymore and it is used to detect CPU cache sizes. Rename it accordingly. Signed-off-by: Borislav Petkov LKML-Reference: <20091121130145.GA31357@liondog.tnic> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c910a71..7128b37 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -535,7 +535,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Multi core CPU? */ if (c->extended_cpuid_level >= 0x80000008) { diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c95e831..e58d978 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_REP_GOOD); } - display_cacheinfo(c); + cpu_detect_cache_sizes(c); } enum { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 61242a5..9bf845d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void) static void __cpuinit default_init(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_64 - display_cacheinfo(c); + cpu_detect_cache_sizes(c); #else /* Not much we can do here... */ /* Check if at least it has cpuid */ @@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) } } -void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 6de9a90..3624e8a 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -32,6 +32,6 @@ struct cpu_dev { extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; -extern void display_cacheinfo(struct cpuinfo_x86 *c); +extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 19807b8..4fbd384 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c) /* Handle the GX (Formally known as the GX2) */ if (c->x86 == 5 && c->x86_model == 5) - display_cacheinfo(c); + cpu_detect_cache_sizes(c); else init_cyrix(c); } diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index bb62b3e..2800074 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) early_init_transmeta(c); - display_cacheinfo(c); + cpu_detect_cache_sizes(c); /* Print CMS and CPU revision */ max = cpuid_eax(0x80860000); -- cgit v0.10.2 From 2890284bcf5c13c10fae8a0c20ad2f575118a092 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 Nov 2009 17:51:08 -0200 Subject: perf tools: Move graph_line and graph_dotted_line from top MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that they can be used in other tools. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259005869-13487-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index eef9caa..6a5de90 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -80,13 +80,6 @@ static bool hide_kernel_symbols = false; static bool hide_user_symbols = false; static struct winsize winsize; const char *vmlinux_name; -static const char *graph_line = - "_____________________________________________________________________" - "_____________________________________________________________________"; -static const char *graph_dotted_line = - "---------------------------------------------------------------------" - "---------------------------------------------------------------------" - "---------------------------------------------------------------------"; /* * Source diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index 0b791bd..3507362 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c @@ -29,3 +29,11 @@ unsigned char sane_ctype[256] = { A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */ /* Nothing in the 128.. range */ }; + +const char *graph_line = + "_____________________________________________________________________" + "_____________________________________________________________________"; +const char *graph_dotted_line = + "---------------------------------------------------------------------" + "---------------------------------------------------------------------" + "---------------------------------------------------------------------"; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index f2203a0..e1c623e 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -84,6 +84,9 @@ #include #endif +extern const char *graph_line; +extern const char *graph_dotted_line; + /* On most systems would have given us this, but * not on some systems (e.g. GNU/Hurd). */ -- cgit v0.10.2 From 1b145ae58035f30353d78d25bea665091df9b438 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 Nov 2009 17:51:09 -0200 Subject: perf kmem: Resolve symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E.g.: [root@doppio linux-2.6-tip]# perf kmem record sleep 3s [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.804 MB perf.data (~35105 samples) ] [root@doppio linux-2.6-tip]# perf kmem --stat caller | head -10 ------------------------------------------------------------------------------ Callsite |Total_alloc/Per | Total_req/Per | Hit | Frag ------------------------------------------------------------------------------ getname/40 | 1519616/4096 | 1519616/4096 | 371| 0.000% seq_read/a2 | 987136/4096 | 987136/4096 | 241| 0.000% __netdev_alloc_skb/43 | 260368/1049 | 259968/1048 | 248| 0.154% __alloc_skb/5a | 77312/256 | 77312/256 | 302| 0.000% proc_alloc_inode/33 | 76480/632 | 76472/632 | 121| 0.010% get_empty_filp/8d | 70272/192 | 70272/192 | 366| 0.000% split_vma/8e | 42064/176 | 42064/176 | 239| 0.000% [root@doppio linux-2.6-tip]# Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Pekka Enberg Cc: Eduard - Gabriel Munteanu Cc: Frédéric Weisbecker Cc: linux-mm@kvack.org Cc: Li Zefan Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Steven Rostedt LKML-Reference: <1259005869-13487-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 5d8aeae..256d18f 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -307,25 +307,34 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) { struct rb_node *next; - printf("\n ------------------------------------------------------------------------------\n"); - if (is_caller) - printf(" Callsite |"); - else - printf(" Alloc Ptr |"); - printf(" Total_alloc/Per | Total_req/Per | Hit | Fragmentation\n"); - printf(" ------------------------------------------------------------------------------\n"); + printf("%.78s\n", graph_dotted_line); + printf("%-28s|", is_caller ? "Callsite": "Alloc Ptr"); + printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n"); + printf("%.78s\n", graph_dotted_line); next = rb_first(root); while (next && n_lines--) { - struct alloc_stat *data; - - data = rb_entry(next, struct alloc_stat, node); + struct alloc_stat *data = rb_entry(next, struct alloc_stat, + node); + struct symbol *sym = NULL; + char bf[BUFSIZ]; + u64 addr; + + if (is_caller) { + addr = data->call_site; + sym = kernel_maps__find_symbol(addr, NULL, NULL); + } else + addr = data->ptr; + + if (sym != NULL) + snprintf(bf, sizeof(bf), "%s/%Lx", sym->name, + addr - sym->start); + else + snprintf(bf, sizeof(bf), "%#Lx", addr); - printf(" %-16p | %8llu/%-6lu | %8llu/%-6lu | %6lu | %8.3f%%\n", - is_caller ? (void *)(unsigned long)data->call_site : - (void *)(unsigned long)data->ptr, - (unsigned long long)data->bytes_alloc, + printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%%\n", + bf, (unsigned long long)data->bytes_alloc, (unsigned long)data->bytes_alloc / data->hit, (unsigned long long)data->bytes_req, (unsigned long)data->bytes_req / data->hit, -- cgit v0.10.2 From fe542cf59bf0b31afe72b9e9749c0f6645419fa0 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 22 Nov 2009 11:49:55 +0900 Subject: LSM: Move security_path_chmod()/security_path_chown() to after mutex_lock(). We should call security_path_chmod()/security_path_chown() after mutex_lock() in order to avoid races. Signed-off-by: Tetsuo Handa Acked-by: John Johansen Signed-off-by: James Morris diff --git a/fs/open.c b/fs/open.c index 201041d..b4b31d2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -619,17 +619,17 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) err = mnt_want_write_file(file); if (err) goto out_putf; + mutex_lock(&inode->i_mutex); err = security_path_chmod(dentry, file->f_vfsmnt, mode); if (err) - goto out_drop_write; - mutex_lock(&inode->i_mutex); + goto out_unlock; if (mode == (mode_t) -1) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; err = notify_change(dentry, &newattrs); +out_unlock: mutex_unlock(&inode->i_mutex); -out_drop_write: mnt_drop_write(file->f_path.mnt); out_putf: fput(file); @@ -652,17 +652,17 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) error = mnt_want_write(path.mnt); if (error) goto dput_and_out; + mutex_lock(&inode->i_mutex); error = security_path_chmod(path.dentry, path.mnt, mode); if (error) - goto out_drop_write; - mutex_lock(&inode->i_mutex); + goto out_unlock; if (mode == (mode_t) -1) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; error = notify_change(path.dentry, &newattrs); +out_unlock: mutex_unlock(&inode->i_mutex); -out_drop_write: mnt_drop_write(path.mnt); dput_and_out: path_put(&path); @@ -675,9 +675,9 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode) return sys_fchmodat(AT_FDCWD, filename, mode); } -static int chown_common(struct dentry * dentry, uid_t user, gid_t group) +static int chown_common(struct path *path, uid_t user, gid_t group) { - struct inode *inode = dentry->d_inode; + struct inode *inode = path->dentry->d_inode; int error; struct iattr newattrs; @@ -694,7 +694,9 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group) newattrs.ia_valid |= ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; mutex_lock(&inode->i_mutex); - error = notify_change(dentry, &newattrs); + error = security_path_chown(path, user, group); + if (!error) + error = notify_change(path->dentry, &newattrs); mutex_unlock(&inode->i_mutex); return error; @@ -711,9 +713,7 @@ SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) error = mnt_want_write(path.mnt); if (error) goto out_release; - error = security_path_chown(&path, user, group); - if (!error) - error = chown_common(path.dentry, user, group); + error = chown_common(&path, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -738,9 +738,7 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, error = mnt_want_write(path.mnt); if (error) goto out_release; - error = security_path_chown(&path, user, group); - if (!error) - error = chown_common(path.dentry, user, group); + error = chown_common(&path, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -759,9 +757,7 @@ SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group error = mnt_want_write(path.mnt); if (error) goto out_release; - error = security_path_chown(&path, user, group); - if (!error) - error = chown_common(path.dentry, user, group); + error = chown_common(&path, user, group); mnt_drop_write(path.mnt); out_release: path_put(&path); @@ -784,9 +780,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) goto out_fput; dentry = file->f_path.dentry; audit_inode(NULL, dentry); - error = security_path_chown(&file->f_path, user, group); - if (!error) - error = chown_common(dentry, user, group); + error = chown_common(&file->f_path, user, group); mnt_drop_write(file->f_path.mnt); out_fput: fput(file); -- cgit v0.10.2 From c4a5af54c8ef277a59189fc9358e190f3c1b8206 Mon Sep 17 00:00:00 2001 From: "Andrew G. Morgan" Date: Mon, 23 Nov 2009 04:57:52 +0000 Subject: Silence the existing API for capability version compatibility check. When libcap, or other libraries attempt to confirm/determine the supported capability version magic, they generally supply a NULL dataptr to capget(). In this case, while returning the supported/preferred magic (via a modified header content), the return code of this system call may be 0, -EINVAL, or -EFAULT. No libcap code depends on the previous -EINVAL etc. return code, and all of the above three return codes can accompany a valid (successful) attempt to determine the requested magic value. This patch cleans up the system call to return 0, if the call is successfully being used to determine the supported/preferred capability magic value. Signed-off-by: Andrew G. Morgan Acked-by: Steve Grubb Acked-by: Serge Hallyn Signed-off-by: James Morris diff --git a/kernel/capability.c b/kernel/capability.c index c2316d3..c450375 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -169,8 +169,8 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) kernel_cap_t pE, pI, pP; ret = cap_validate_magic(header, &tocopy); - if (ret != 0) - return ret; + if ((dataptr == NULL) || (ret != 0)) + return ((dataptr == NULL) && (ret == -EINVAL)) ? 0 : ret; if (get_user(pid, &header->pid)) return -EFAULT; -- cgit v0.10.2 From fa7c27ee9394fc0d52404b2a89882e95868a60b9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 23 Nov 2009 22:30:12 +0100 Subject: hw-breakpoints: Fix misordered ifdef Fix a misplaced ifdef. We need the perf event headers also in off-case to avoid the following build error: include/linux/hw_breakpoint.h:94: error: expected declaration specifiers or '...' before 'perf_callback_t' include/linux/hw_breakpoint.h:102: error: expected declaration specifiers or '...' before 'perf_callback_t' include/linux/hw_breakpoint.h:109: error: expected declaration specifiers or '...' before 'perf_callback_t' include/linux/hw_breakpoint.h:116: error: expected declaration specifiers or '...' before 'perf_callback_t' Reported-by: Kisskb-bot by Michael Ellerman Signed-off-by: Frederic Weisbecker Cc: Prasad LKML-Reference: <1259011812-8093-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 76a48ab..c9f7f7c 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -15,10 +15,11 @@ enum { }; #ifdef __KERNEL__ -#ifdef CONFIG_HAVE_HW_BREAKPOINT #include +#ifdef CONFIG_HAVE_HW_BREAKPOINT + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; -- cgit v0.10.2 From 0bce95279909aa4cc401a2e3140b4295ca22e72a Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 23 Nov 2009 16:47:23 -0500 Subject: SELinux: print denials for buggy kernel with unknown perms Historically we've seen cases where permissions are requested for classes where they do not exist. In particular we have seen CIFS forget to set i_mode to indicate it is a directory so when we later check something like remove_name we have problems since it wasn't defined in tclass file. This used to result in a avc which included the permission 0x2000 or something. Currently the kernel will deny the operations (good thing) but will not print ANY information (bad thing). First the auditdeny field is no extended to include unknown permissions. After that is fixed the logic in avc_dump_query to output this information isn't right since it will remove the permission from the av and print the phrase "". This takes us back to the behavior before the classmap rewrite. Signed-off-by: Eric Paris Signed-off-by: James Morris diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 18f4103..f2dde26 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -117,7 +117,7 @@ static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av) i = 0; perm = 1; while (i < (sizeof(av) * 8)) { - if (perm & av) { + if ((perm & av) && perms[i]) { audit_log_format(ab, " %s", perms[i]); av &= ~perm; } diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 77f6e54..d6bb20c 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -239,6 +239,13 @@ static void map_decision(u16 tclass, struct av_decision *avd, if (!allow_unknown && !current_mapping[tclass].perms[i]) result |= 1<auditdeny = result; } } -- cgit v0.10.2 From b3a222e52e4d4be77cc4520a57af1a4a0d8222d1 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 23 Nov 2009 16:21:30 -0600 Subject: remove CONFIG_SECURITY_FILE_CAPABILITIES compile option As far as I know, all distros currently ship kernels with default CONFIG_SECURITY_FILE_CAPABILITIES=y. Since having the option on leaves a 'no_file_caps' option to boot without file capabilities, the main reason to keep the option is that turning it off saves you (on my s390x partition) 5k. In particular, vmlinux sizes came to: without patch fscaps=n: 53598392 without patch fscaps=y: 53603406 with this patch applied: 53603342 with the security-next tree. Against this we must weigh the fact that there is no simple way for userspace to figure out whether file capabilities are supported, while things like per-process securebits, capability bounding sets, and adding bits to pI if CAP_SETPCAP is in pE are not supported with SECURITY_FILE_CAPABILITIES=n, leaving a bit of a problem for applications wanting to know whether they can use them and/or why something failed. It also adds another subtly different set of semantics which we must maintain at the risk of severe security regressions. So this patch removes the SECURITY_FILE_CAPABILITIES compile option. It drops the kernel size by about 50k over the stock SECURITY_FILE_CAPABILITIES=y kernel, by removing the cap_limit_ptraced_target() function. Changelog: Nov 20: remove cap_limit_ptraced_target() as it's logic was ifndef'ed. Signed-off-by: Serge E. Hallyn Acked-by: Andrew G. Morgan" Signed-off-by: James Morris diff --git a/include/linux/capability.h b/include/linux/capability.h index c8f2a5f7..39e5ff5 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -92,9 +92,7 @@ struct vfs_cap_data { #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 #define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES extern int file_caps_enabled; -#endif typedef struct kernel_cap_struct { __u32 cap[_KERNEL_CAPABILITY_U32S]; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 21a6f5d..8d10aa7 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -83,16 +83,12 @@ extern struct group_info init_groups; #define INIT_IDS #endif -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES /* * Because of the reduced scope of CAP_SETPCAP when filesystem * capabilities are in effect, it is safe to allow CAP_SETPCAP to * be available in the default configuration. */ # define CAP_INIT_BSET CAP_FULL_SET -#else -# define CAP_INIT_BSET CAP_INIT_EFF_SET -#endif #ifdef CONFIG_TREE_PREEMPT_RCU #define INIT_TASK_RCU_PREEMPT(tsk) \ diff --git a/kernel/capability.c b/kernel/capability.c index c450375..7f876e6 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -29,7 +29,6 @@ EXPORT_SYMBOL(__cap_empty_set); EXPORT_SYMBOL(__cap_full_set); EXPORT_SYMBOL(__cap_init_eff_set); -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES int file_caps_enabled = 1; static int __init file_caps_disable(char *str) @@ -38,7 +37,6 @@ static int __init file_caps_disable(char *str) return 1; } __setup("no_file_caps", file_caps_disable); -#endif /* * More recent versions of libcap are available from: diff --git a/security/Kconfig b/security/Kconfig index 95cc089..226b955 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -91,15 +91,6 @@ config SECURITY_PATH implement pathname based access controls. If you are unsure how to answer this question, answer N. -config SECURITY_FILE_CAPABILITIES - bool "File POSIX Capabilities" - default n - help - This enables filesystem capabilities, allowing you to give - binaries a subset of root's powers without using setuid 0. - - If in doubt, answer N. - config INTEL_TXT bool "Enable Intel(R) Trusted Execution Technology (Intel(R) TXT)" depends on HAVE_INTEL_TXT diff --git a/security/commoncap.c b/security/commoncap.c index 45b87af..f800fdb 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -173,7 +173,6 @@ int cap_capget(struct task_struct *target, kernel_cap_t *effective, */ static inline int cap_inh_is_capped(void) { -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES /* they are so limited unless the current task has the CAP_SETPCAP * capability @@ -181,7 +180,6 @@ static inline int cap_inh_is_capped(void) if (cap_capable(current, current_cred(), CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0) return 0; -#endif return 1; } @@ -239,8 +237,6 @@ static inline void bprm_clear_caps(struct linux_binprm *bprm) bprm->cap_effective = false; } -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES - /** * cap_inode_need_killpriv - Determine if inode change affects privileges * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV @@ -421,49 +417,6 @@ out: return rc; } -#else -int cap_inode_need_killpriv(struct dentry *dentry) -{ - return 0; -} - -int cap_inode_killpriv(struct dentry *dentry) -{ - return 0; -} - -int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps) -{ - memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data)); - return -ENODATA; -} - -static inline int get_file_caps(struct linux_binprm *bprm, bool *effective) -{ - bprm_clear_caps(bprm); - return 0; -} -#endif - -/* - * Determine whether a exec'ing process's new permitted capabilities should be - * limited to just what it already has. - * - * This prevents processes that are being ptraced from gaining access to - * CAP_SETPCAP, unless the process they're tracing already has it, and the - * binary they're executing has filecaps that elevate it. - * - * Returns 1 if they should be limited, 0 if they are not. - */ -static inline int cap_limit_ptraced_target(void) -{ -#ifndef CONFIG_SECURITY_FILE_CAPABILITIES - if (capable(CAP_SETPCAP)) - return 0; -#endif - return 1; -} - /** * cap_bprm_set_creds - Set up the proposed credentials for execve(). * @bprm: The execution parameters, including the proposed creds @@ -523,9 +476,8 @@ skip: new->euid = new->uid; new->egid = new->gid; } - if (cap_limit_ptraced_target()) - new->cap_permitted = cap_intersect(new->cap_permitted, - old->cap_permitted); + new->cap_permitted = cap_intersect(new->cap_permitted, + old->cap_permitted); } new->suid = new->fsuid = new->euid; @@ -739,7 +691,6 @@ int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) return 0; } -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES /* * Rationale: code calling task_setscheduler, task_setioprio, and * task_setnice, assumes that @@ -820,22 +771,6 @@ static long cap_prctl_drop(struct cred *new, unsigned long cap) return 0; } -#else -int cap_task_setscheduler (struct task_struct *p, int policy, - struct sched_param *lp) -{ - return 0; -} -int cap_task_setioprio (struct task_struct *p, int ioprio) -{ - return 0; -} -int cap_task_setnice (struct task_struct *p, int nice) -{ - return 0; -} -#endif - /** * cap_task_prctl - Implement process control functions for this security module * @option: The process control function requested @@ -866,7 +801,6 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, error = !!cap_raised(new->cap_bset, arg2); goto no_change; -#ifdef CONFIG_SECURITY_FILE_CAPABILITIES case PR_CAPBSET_DROP: error = cap_prctl_drop(new, arg2); if (error < 0) @@ -917,8 +851,6 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, error = new->securebits; goto no_change; -#endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ - case PR_GET_KEEPCAPS: if (issecure(SECURE_KEEP_CAPS)) error = 1; -- cgit v0.10.2 From ee3d250446f1c1be4eceab48f3a23794d9a6564c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 24 Nov 2009 15:19:43 +1100 Subject: perf tools: Fix compilation on powerpc Currently, perf fails to compile on powerpc with this error: CC util/header.o In file included from util/../perf.h:17, from util/header.c:9: util/../../../arch/powerpc/include/asm/unistd.h:360:27: error: linux/linkage.h: No such file or directory make: *** [util/header.o] Error 1 The reason is that we still have a #define __KERNEL__ in effect at the point where gets included, which means we get extra stuff that we don't need or want. This fixes the problem by undefining __KERNEL__ once we have included the file for which we need __KERNEL__ defined. Signed-off-by: Paul Mackerras Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra LKML-Reference: <19211.24287.453183.78836@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index ace57c3..8d63116 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -7,6 +7,8 @@ #define CONFIG_GENERIC_FIND_FIRST_BIT #include "../../../../include/linux/bitops.h" +#undef __KERNEL__ + static inline void set_bit(int nr, unsigned long *addr) { addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); -- cgit v0.10.2 From 7707b6b6f8d9188b612f9fc88c65411264b1ed57 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 24 Nov 2009 13:25:48 +0800 Subject: perf kmem: Add new option to show raw ip Add option "--raw-ip" to show raw ip instead of symbols: # ./perf kmem --stat caller --raw-ip ------------------------------------------------------------------------------ Callsite |Total_alloc/Per | Total_req/Per | Hit | Frag ------------------------------------------------------------------------------ 0xc05301aa | 733184/4096 | 733184/4096 | 179| 0.000% 0xc0542ba0 | 483328/4096 | 483328/4096 | 118| 0.000% ... Also show symbols with format sym+offset instead of sym/offset. Signed-off-by: Li Zefan Acked-by: Pekka Enberg Cc: Eduard - Gabriel Munteanu Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: linux-mm@kvack.org LKML-Reference: <4B0B6E5C.4080900@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 256d18f..1ef43c2 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -32,15 +32,14 @@ sort_fn_t caller_sort_fn; static int alloc_lines = -1; static int caller_lines = -1; +static bool raw_ip; + static char *cwd; static int cwdlen; struct alloc_stat { union { - struct { - char *name; - u64 call_site; - }; + u64 call_site; u64 ptr; }; u64 bytes_req; @@ -323,12 +322,14 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) if (is_caller) { addr = data->call_site; - sym = kernel_maps__find_symbol(addr, NULL, NULL); + if (!raw_ip) + sym = kernel_maps__find_symbol(addr, + NULL, NULL); } else addr = data->ptr; if (sym != NULL) - snprintf(bf, sizeof(bf), "%s/%Lx", sym->name, + snprintf(bf, sizeof(bf), "%s+%Lx", sym->name, addr - sym->start); else snprintf(bf, sizeof(bf), "%#Lx", addr); @@ -345,9 +346,9 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) } if (n_lines == -1) - printf(" ... | ... | ... | ... | ... \n"); + printf(" ... | ... | ... | ... | ... \n"); - printf(" ------------------------------------------------------------------------------\n"); + printf("%.78s\n", graph_dotted_line); } static void print_summary(void) @@ -558,6 +559,7 @@ static const struct option kmem_options[] = { OPT_CALLBACK('l', "line", NULL, "num", "show n lins", parse_line_opt), + OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), OPT_END() }; -- cgit v0.10.2 From 29b3e15289eb66788a0bf5ea4903f9fbeb1ec751 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 24 Nov 2009 13:26:10 +0800 Subject: perf kmem: Default to sort by fragmentation Make the output sort by fragmentation by default. Also make the usage of "--sort" option consistent with other perf tools. That is, we support multi keys: "--sort key1[,key2]...". # ./perf kmem --stat caller ------------------------------------------------------------------------------ Callsite |Total_alloc/Per | Total_req/Per | Hit | Frag ------------------------------------------------------------------------------ __netdev_alloc_skb+23 | 5048/1682 | 4564/1521 | 3| 9.588% perf_event_alloc.clone.0+0 | 7504/682 | 7128/648 | 11| 5.011% tracepoint_add_probe+32e | 157/31 | 154/30 | 5| 1.911% alloc_buffer_head+16 | 456/57 | 448/56 | 8| 1.754% radix_tree_preload+51 | 584/292 | 576/288 | 2| 1.370% ... TODO: - Extract duplicate code in builtin-kmem.c and builtin-sched.c into util/sort.c. Signed-off-by: Li Zefan Acked-by: Pekka Enberg Cc: Eduard - Gabriel Munteanu Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: linux-mm@kvack.org LKML-Reference: <4B0B6E72.7010200@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 1ef43c2..dc86f1e 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -26,14 +26,13 @@ static u64 sample_type; static int alloc_flag; static int caller_flag; -sort_fn_t alloc_sort_fn; -sort_fn_t caller_sort_fn; - static int alloc_lines = -1; static int caller_lines = -1; static bool raw_ip; +static char default_sort_order[] = "frag,hit,bytes"; + static char *cwd; static int cwdlen; @@ -371,20 +370,34 @@ static void print_result(void) print_summary(); } +struct sort_dimension { + const char name[20]; + sort_fn_t cmp; + struct list_head list; +}; + +static LIST_HEAD(caller_sort); +static LIST_HEAD(alloc_sort); + static void sort_insert(struct rb_root *root, struct alloc_stat *data, - sort_fn_t sort_fn) + struct list_head *sort_list) { struct rb_node **new = &(root->rb_node); struct rb_node *parent = NULL; + struct sort_dimension *sort; while (*new) { struct alloc_stat *this; - int cmp; + int cmp = 0; this = rb_entry(*new, struct alloc_stat, node); parent = *new; - cmp = sort_fn(data, this); + list_for_each_entry(sort, sort_list, list) { + cmp = sort->cmp(data, this); + if (cmp) + break; + } if (cmp > 0) new = &((*new)->rb_left); @@ -397,7 +410,7 @@ static void sort_insert(struct rb_root *root, struct alloc_stat *data, } static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, - sort_fn_t sort_fn) + struct list_head *sort_list) { struct rb_node *node; struct alloc_stat *data; @@ -409,14 +422,14 @@ static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, rb_erase(node, root); data = rb_entry(node, struct alloc_stat, node); - sort_insert(root_sorted, data, sort_fn); + sort_insert(root_sorted, data, sort_list); } } static void sort_result(void) { - __sort_result(&root_alloc_stat, &root_alloc_sorted, alloc_sort_fn); - __sort_result(&root_caller_stat, &root_caller_sorted, caller_sort_fn); + __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); + __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); } static int __cmd_kmem(void) @@ -434,7 +447,6 @@ static const char * const kmem_usage[] = { NULL }; - static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) { if (l->ptr < r->ptr) @@ -444,6 +456,11 @@ static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static struct sort_dimension ptr_sort_dimension = { + .name = "ptr", + .cmp = ptr_cmp, +}; + static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) { if (l->call_site < r->call_site) @@ -453,6 +470,11 @@ static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static struct sort_dimension callsite_sort_dimension = { + .name = "callsite", + .cmp = callsite_cmp, +}; + static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) { if (l->hit < r->hit) @@ -462,6 +484,11 @@ static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static struct sort_dimension hit_sort_dimension = { + .name = "hit", + .cmp = hit_cmp, +}; + static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) { if (l->bytes_alloc < r->bytes_alloc) @@ -471,6 +498,11 @@ static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static struct sort_dimension bytes_sort_dimension = { + .name = "bytes", + .cmp = bytes_cmp, +}; + static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) { double x, y; @@ -485,31 +517,73 @@ static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) return 0; } +static struct sort_dimension frag_sort_dimension = { + .name = "frag", + .cmp = frag_cmp, +}; + +static struct sort_dimension *avail_sorts[] = { + &ptr_sort_dimension, + &callsite_sort_dimension, + &hit_sort_dimension, + &bytes_sort_dimension, + &frag_sort_dimension, +}; + +#define NUM_AVAIL_SORTS \ + (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *)) + +static int sort_dimension__add(const char *tok, struct list_head *list) +{ + struct sort_dimension *sort; + int i; + + for (i = 0; i < NUM_AVAIL_SORTS; i++) { + if (!strcmp(avail_sorts[i]->name, tok)) { + sort = malloc(sizeof(*sort)); + if (!sort) + die("malloc"); + memcpy(sort, avail_sorts[i], sizeof(*sort)); + list_add_tail(&sort->list, list); + return 0; + } + } + + return -1; +} + +static int setup_sorting(struct list_head *sort_list, const char *arg) +{ + char *tok; + char *str = strdup(arg); + + if (!str) + die("strdup"); + + while (true) { + tok = strsep(&str, ","); + if (!tok) + break; + if (sort_dimension__add(tok, sort_list) < 0) { + error("Unknown --sort key: '%s'", tok); + return -1; + } + } + + free(str); + return 0; +} + static int parse_sort_opt(const struct option *opt __used, const char *arg, int unset __used) { - sort_fn_t sort_fn; - if (!arg) return -1; - if (strcmp(arg, "ptr") == 0) - sort_fn = ptr_cmp; - else if (strcmp(arg, "call_site") == 0) - sort_fn = callsite_cmp; - else if (strcmp(arg, "hit") == 0) - sort_fn = hit_cmp; - else if (strcmp(arg, "bytes") == 0) - sort_fn = bytes_cmp; - else if (strcmp(arg, "frag") == 0) - sort_fn = frag_cmp; - else - return -1; - if (caller_flag > alloc_flag) - caller_sort_fn = sort_fn; + return setup_sorting(&caller_sort, arg); else - alloc_sort_fn = sort_fn; + return setup_sorting(&alloc_sort, arg); return 0; } @@ -553,8 +627,8 @@ static const struct option kmem_options[] = { OPT_CALLBACK(0, "stat", NULL, "|", "stat selector, Pass 'alloc' or 'caller'.", parse_stat_opt), - OPT_CALLBACK('s', "sort", NULL, "key", - "sort by key: ptr, call_site, hit, bytes, frag", + OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", + "sort by key(s): ptr, call_site, bytes, hit, frag", parse_sort_opt), OPT_CALLBACK('l', "line", NULL, "num", "show n lins", @@ -606,10 +680,10 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used) else if (argc) usage_with_options(kmem_usage, kmem_options); - if (!alloc_sort_fn) - alloc_sort_fn = bytes_cmp; - if (!caller_sort_fn) - caller_sort_fn = bytes_cmp; + if (list_empty(&caller_sort)) + setup_sorting(&caller_sort, default_sort_order); + if (list_empty(&alloc_sort)) + setup_sorting(&alloc_sort, default_sort_order); return __cmd_kmem(); } -- cgit v0.10.2 From 7d0d39459dab20bf60cac30a1a7d50b286c60cc1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 24 Nov 2009 13:26:31 +0800 Subject: perf kmem: Collect cross node allocation statistics Show cross node memory allocations: # ./perf kmem SUMMARY ======= ... Cross node allocations: 0/3633 Signed-off-by: Li Zefan Acked-by: Pekka Enberg Cc: Eduard - Gabriel Munteanu Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: linux-mm@kvack.org LKML-Reference: <4B0B6E87.10906@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index dc86f1e..1ecf3f4 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -36,6 +36,9 @@ static char default_sort_order[] = "frag,hit,bytes"; static char *cwd; static int cwdlen; +static int *cpunode_map; +static int max_cpu_num; + struct alloc_stat { union { u64 call_site; @@ -54,12 +57,74 @@ static struct rb_root root_caller_stat; static struct rb_root root_caller_sorted; static unsigned long total_requested, total_allocated; +static unsigned long nr_allocs, nr_cross_allocs; struct raw_event_sample { u32 size; char data[0]; }; +#define PATH_SYS_NODE "/sys/devices/system/node" + +static void init_cpunode_map(void) +{ + FILE *fp; + int i; + + fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); + if (!fp) { + max_cpu_num = 4096; + return; + } + + if (fscanf(fp, "%d", &max_cpu_num) < 1) + die("Failed to read 'kernel_max' from sysfs"); + max_cpu_num++; + + cpunode_map = calloc(max_cpu_num, sizeof(int)); + if (!cpunode_map) + die("calloc"); + for (i = 0; i < max_cpu_num; i++) + cpunode_map[i] = -1; + fclose(fp); +} + +static void setup_cpunode_map(void) +{ + struct dirent *dent1, *dent2; + DIR *dir1, *dir2; + unsigned int cpu, mem; + char buf[PATH_MAX]; + + init_cpunode_map(); + + dir1 = opendir(PATH_SYS_NODE); + if (!dir1) + return; + + while (true) { + dent1 = readdir(dir1); + if (!dent1) + break; + + if (sscanf(dent1->d_name, "node%u", &mem) < 1) + continue; + + snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); + dir2 = opendir(buf); + if (!dir2) + continue; + while (true) { + dent2 = readdir(dir2); + if (!dent2) + break; + if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1) + continue; + cpunode_map[cpu] = mem; + } + } +} + static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { @@ -157,15 +222,16 @@ static void insert_caller_stat(unsigned long call_site, static void process_alloc_event(struct raw_event_sample *raw, struct event *event, - int cpu __used, + int cpu, u64 timestamp __used, struct thread *thread __used, - int node __used) + int node) { unsigned long call_site; unsigned long ptr; int bytes_req; int bytes_alloc; + int node1, node2; ptr = raw_field_value(event, "ptr", raw->data); call_site = raw_field_value(event, "call_site", raw->data); @@ -177,6 +243,14 @@ static void process_alloc_event(struct raw_event_sample *raw, total_requested += bytes_req; total_allocated += bytes_alloc; + + if (node) { + node1 = cpunode_map[cpu]; + node2 = raw_field_value(event, "node", raw->data); + if (node1 != node2) + nr_cross_allocs++; + } + nr_allocs++; } static void process_free_event(struct raw_event_sample *raw __used, @@ -359,6 +433,7 @@ static void print_summary(void) total_allocated - total_requested); printf("Internal fragmentation: %f%%\n", fragmentation(total_requested, total_allocated)); + printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); } static void print_result(void) @@ -685,6 +760,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __used) if (list_empty(&alloc_sort)) setup_sorting(&alloc_sort, default_sort_order); + setup_cpunode_map(); + return __cmd_kmem(); } -- cgit v0.10.2 From 079d3f653134e2f2ac99dae28b08c0cc64268103 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 24 Nov 2009 13:26:55 +0800 Subject: perf kmem: Measure kmalloc/kfree CPU ping-pong call-sites Show statistics for allocations and frees on different cpus: ------------------------------------------------------------------------------------------------------ Callsite | Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag ------------------------------------------------------------------------------------------------------ perf_event_alloc.clone.0+0 | 7504/682 | 7128/648 | 11 | 0 | 5.011% alloc_buffer_head+16 | 288/57 | 280/56 | 5 | 0 | 2.778% radix_tree_preload+51 | 296/296 | 288/288 | 1 | 0 | 2.703% tracepoint_add_probe+32e | 157/31 | 154/30 | 5 | 0 | 1.911% do_maps_open+0 | 796/12 | 792/12 | 66 | 0 | 0.503% sock_alloc_send_pskb+16e | 23780/495 | 23744/494 | 48 | 38 | 0.151% anon_vma_prepare+9a | 3744/44 | 3740/44 | 85 | 0 | 0.107% d_alloc+21 | 64948/164 | 64944/164 | 396 | 0 | 0.006% proc_alloc_inode+23 | 262292/676 | 262288/676 | 388 | 0 | 0.002% create_object+28 | 459600/200 | 459600/200 | 2298 | 71 | 0.000% journal_start+67 | 14440/40 | 14440/40 | 361 | 0 | 0.000% get_empty_filp+df | 53504/256 | 53504/256 | 209 | 0 | 0.000% getname+2a | 823296/4096 | 823296/4096 | 201 | 0 | 0.000% seq_read+2b0 | 544768/4096 | 544768/4096 | 133 | 0 | 0.000% seq_open+6d | 17024/128 | 17024/128 | 133 | 0 | 0.000% mmap_region+2e6 | 11704/88 | 11704/88 | 133 | 0 | 0.000% single_open+0 | 1072/16 | 1072/16 | 67 | 0 | 0.000% __alloc_skb+2e | 12544/256 | 12544/256 | 49 | 38 | 0.000% __sigqueue_alloc+4a | 1296/144 | 1296/144 | 9 | 8 | 0.000% tracepoint_add_probe+6f | 80/16 | 80/16 | 5 | 0 | 0.000% ------------------------------------------------------------------------------------------------------ ... Signed-off-by: Li Zefan Acked-by: Pekka Enberg Cc: Eduard - Gabriel Munteanu Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: linux-mm@kvack.org LKML-Reference: <4B0B6E9F.6020309@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 1ecf3f4..173d6db 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -40,13 +40,14 @@ static int *cpunode_map; static int max_cpu_num; struct alloc_stat { - union { - u64 call_site; - u64 ptr; - }; + u64 call_site; + u64 ptr; u64 bytes_req; u64 bytes_alloc; u32 hit; + u32 pingpong; + + short alloc_cpu; struct rb_node node; }; @@ -144,16 +145,13 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static void insert_alloc_stat(unsigned long ptr, - int bytes_req, int bytes_alloc) +static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, + int bytes_req, int bytes_alloc, int cpu) { struct rb_node **node = &root_alloc_stat.rb_node; struct rb_node *parent = NULL; struct alloc_stat *data = NULL; - if (!alloc_flag) - return; - while (*node) { parent = *node; data = rb_entry(*node, struct alloc_stat, node); @@ -172,7 +170,10 @@ static void insert_alloc_stat(unsigned long ptr, data->bytes_alloc += bytes_req; } else { data = malloc(sizeof(*data)); + if (!data) + die("malloc"); data->ptr = ptr; + data->pingpong = 0; data->hit = 1; data->bytes_req = bytes_req; data->bytes_alloc = bytes_alloc; @@ -180,6 +181,8 @@ static void insert_alloc_stat(unsigned long ptr, rb_link_node(&data->node, parent, node); rb_insert_color(&data->node, &root_alloc_stat); } + data->call_site = call_site; + data->alloc_cpu = cpu; } static void insert_caller_stat(unsigned long call_site, @@ -189,9 +192,6 @@ static void insert_caller_stat(unsigned long call_site, struct rb_node *parent = NULL; struct alloc_stat *data = NULL; - if (!caller_flag) - return; - while (*node) { parent = *node; data = rb_entry(*node, struct alloc_stat, node); @@ -210,7 +210,10 @@ static void insert_caller_stat(unsigned long call_site, data->bytes_alloc += bytes_req; } else { data = malloc(sizeof(*data)); + if (!data) + die("malloc"); data->call_site = call_site; + data->pingpong = 0; data->hit = 1; data->bytes_req = bytes_req; data->bytes_alloc = bytes_alloc; @@ -238,7 +241,7 @@ static void process_alloc_event(struct raw_event_sample *raw, bytes_req = raw_field_value(event, "bytes_req", raw->data); bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); - insert_alloc_stat(ptr, bytes_req, bytes_alloc); + insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); insert_caller_stat(call_site, bytes_req, bytes_alloc); total_requested += bytes_req; @@ -253,12 +256,58 @@ static void process_alloc_event(struct raw_event_sample *raw, nr_allocs++; } -static void process_free_event(struct raw_event_sample *raw __used, - struct event *event __used, - int cpu __used, +static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); +static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); + +static struct alloc_stat *search_alloc_stat(unsigned long ptr, + unsigned long call_site, + struct rb_root *root, + sort_fn_t sort_fn) +{ + struct rb_node *node = root->rb_node; + struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; + + while (node) { + struct alloc_stat *data; + int cmp; + + data = rb_entry(node, struct alloc_stat, node); + + cmp = sort_fn(&key, data); + if (cmp < 0) + node = node->rb_left; + else if (cmp > 0) + node = node->rb_right; + else + return data; + } + return NULL; +} + +static void process_free_event(struct raw_event_sample *raw, + struct event *event, + int cpu, u64 timestamp __used, struct thread *thread __used) { + unsigned long ptr; + struct alloc_stat *s_alloc, *s_caller; + + ptr = raw_field_value(event, "ptr", raw->data); + + s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); + if (!s_alloc) + return; + + if (cpu != s_alloc->alloc_cpu) { + s_alloc->pingpong++; + + s_caller = search_alloc_stat(0, s_alloc->call_site, + &root_caller_stat, callsite_cmp); + assert(s_caller); + s_caller->pingpong++; + } + s_alloc->alloc_cpu = -1; } static void @@ -379,10 +428,10 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) { struct rb_node *next; - printf("%.78s\n", graph_dotted_line); - printf("%-28s|", is_caller ? "Callsite": "Alloc Ptr"); - printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n"); - printf("%.78s\n", graph_dotted_line); + printf("%.102s\n", graph_dotted_line); + printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); + printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); + printf("%.102s\n", graph_dotted_line); next = rb_first(root); @@ -390,7 +439,7 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) struct alloc_stat *data = rb_entry(next, struct alloc_stat, node); struct symbol *sym = NULL; - char bf[BUFSIZ]; + char buf[BUFSIZ]; u64 addr; if (is_caller) { @@ -402,26 +451,28 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) addr = data->ptr; if (sym != NULL) - snprintf(bf, sizeof(bf), "%s+%Lx", sym->name, + snprintf(buf, sizeof(buf), "%s+%Lx", sym->name, addr - sym->start); else - snprintf(bf, sizeof(bf), "%#Lx", addr); + snprintf(buf, sizeof(buf), "%#Lx", addr); + printf(" %-34s |", buf); - printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%%\n", - bf, (unsigned long long)data->bytes_alloc, + printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n", + (unsigned long long)data->bytes_alloc, (unsigned long)data->bytes_alloc / data->hit, (unsigned long long)data->bytes_req, (unsigned long)data->bytes_req / data->hit, (unsigned long)data->hit, + (unsigned long)data->pingpong, fragmentation(data->bytes_req, data->bytes_alloc)); next = rb_next(next); } if (n_lines == -1) - printf(" ... | ... | ... | ... | ... \n"); + printf(" ... | ... | ... | ... | ... | ... \n"); - printf("%.78s\n", graph_dotted_line); + printf("%.102s\n", graph_dotted_line); } static void print_summary(void) @@ -597,12 +648,27 @@ static struct sort_dimension frag_sort_dimension = { .cmp = frag_cmp, }; +static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r) +{ + if (l->pingpong < r->pingpong) + return -1; + else if (l->pingpong > r->pingpong) + return 1; + return 0; +} + +static struct sort_dimension pingpong_sort_dimension = { + .name = "pingpong", + .cmp = pingpong_cmp, +}; + static struct sort_dimension *avail_sorts[] = { &ptr_sort_dimension, &callsite_sort_dimension, &hit_sort_dimension, &bytes_sort_dimension, &frag_sort_dimension, + &pingpong_sort_dimension, }; #define NUM_AVAIL_SORTS \ @@ -703,7 +769,7 @@ static const struct option kmem_options[] = { "stat selector, Pass 'alloc' or 'caller'.", parse_stat_opt), OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", - "sort by key(s): ptr, call_site, bytes, hit, frag", + "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", parse_sort_opt), OPT_CALLBACK('l', "line", NULL, "num", "show n lins", -- cgit v0.10.2 From b23d5767a5818caec8547d0bce1588b02bdecd30 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 24 Nov 2009 13:27:11 +0800 Subject: perf kmem: Add help file Add Documentation/perf-kmem.txt Signed-off-by: Li Zefan Acked-by: Pekka Enberg Cc: Eduard - Gabriel Munteanu Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: linux-mm@kvack.org LKML-Reference: <4B0B6EAF.80802@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt new file mode 100644 index 0000000..44b0ce3 --- /dev/null +++ b/tools/perf/Documentation/perf-kmem.txt @@ -0,0 +1,44 @@ +perf-kmem(1) +============== + +NAME +---- +perf-kmem - Tool to trace/measure kernel memory(slab) properties + +SYNOPSIS +-------- +[verse] +'perf kmem' {record} [] + +DESCRIPTION +----------- +There's two variants of perf kmem: + + 'perf kmem record ' to record the kmem events + of an arbitrary workload. + + 'perf kmem' to report kernel memory statistics. + +OPTIONS +------- +-i :: +--input=:: + Select the input file (default: perf.data) + +--stat=:: + Select per callsite or per allocation statistics + +-s :: +--sort=:: + Sort the output (default: frag,hit,bytes) + +-l :: +--line=:: + Print n lines only + +--raw-ip:: + Print raw ip instead of symbol + +SEE ALSO +-------- +linkperf:perf-record[1] diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index d3a6e18..02b09ea 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -14,3 +14,4 @@ perf-timechart mainporcelain common perf-top mainporcelain common perf-trace mainporcelain common perf-probe mainporcelain common +perf-kmem mainporcelain common -- cgit v0.10.2 From 184d3da8ef0ca552dffa0fdd35c046e058a2cf9a Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 23 Nov 2009 21:40:49 -0800 Subject: perf_events: Fix bogus copy_to_user() in perf_event_read_group() When using an event group, the value and id for non leaders events were wrong due to invalid offset into the outgoing buffer. Signed-off-by: Stephane Eranian Acked-by: Peter Zijlstra Cc: paulus@samba.org Cc: perfmon2-devel@lists.sourceforge.net LKML-Reference: <4b0b71e1.0508d00a.075e.ffff84a3@mx.google.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9425c96..accfd7bf 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1831,7 +1831,7 @@ static int perf_event_read_group(struct perf_event *event, size = n * sizeof(u64); - if (copy_to_user(buf + size, values, size)) { + if (copy_to_user(buf + ret, values, size)) { ret = -EFAULT; goto unlock; } -- cgit v0.10.2 From 36ace27e3e60d44ea69ce394b2e45386ae98d9d9 Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Tue, 24 Nov 2009 11:55:45 +0100 Subject: sched: Optimize branch hint in pick_next_task_fair() Branch hint profiling on my nehalem machine showed 90% incorrect branch hints: 15728471 158903754 90 pick_next_task_fair sched_fair.c 1555 Signed-off-by: Tim Blechmann Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <4B0BBBB1.2050100@klingt.org> Signed-off-by: Ingo Molnar diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f28a267..24086e7 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1704,7 +1704,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; - if (unlikely(!cfs_rq->nr_running)) + if (!cfs_rq->nr_running) return NULL; do { -- cgit v0.10.2 From 710390d90f143a9ebb87a475215140f426792efd Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Tue, 24 Nov 2009 11:55:27 +0100 Subject: sched: Optimize branch hint in context_switch() Branch hint profiling on my nehalem machine showed over 90% incorrect branch hints: 10420275 170645395 94 context_switch sched.c 3043 10408421 171098521 94 context_switch sched.c 3050 Signed-off-by: Tim Blechmann Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <4B0BBB9F.6080304@klingt.org> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 93474a7..010d5e1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2829,14 +2829,14 @@ context_switch(struct rq *rq, struct task_struct *prev, */ arch_start_context_switch(prev); - if (unlikely(!mm)) { + if (likely(!mm)) { next->active_mm = oldmm; atomic_inc(&oldmm->mm_count); enter_lazy_tlb(oldmm, next); } else switch_mm(oldmm, mm, next); - if (unlikely(!prev->mm)) { + if (likely(!prev->mm)) { prev->active_mm = NULL; rq->prev_mm = oldmm; } -- cgit v0.10.2 From a3a1de0c34de6f5f8332cd6151c46af7813c0fcb Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Tue, 24 Nov 2009 11:55:15 +0100 Subject: sched, x86: Optimize branch hint in __switch_to() Branch hint profiling on my nehalem machine showed 96% incorrect branch hints: 6548732 174664120 96 __switch_to process_64.c 406 6548745 174565593 96 __switch_to process_64.c 410 Signed-off-by: Tim Blechmann Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <4B0BBB93.3080307@klingt.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ad535b6..d9db104 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -406,11 +406,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * This won't pick up thread selector changes, but I guess that is ok. */ savesegment(es, prev->es); - if (unlikely(next->es | prev->es)) + if (next->es | prev->es) loadsegment(es, next->es); - savesegment(ds, prev->ds); - if (unlikely(next->ds | prev->ds)) + if (next->ds | prev->ds) loadsegment(ds, next->ds); -- cgit v0.10.2 From c9286b7e293a1ea054e857ff3f5a23d0ad8d4f36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2009 19:50:38 +0100 Subject: locking: Remove unused prototype commit 910067d1(remove generic__raw_read_trylock()) removed the implementation but left the prototype around. Remove it. Signed-off-by: Thomas Gleixner diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index f0ca7a7..faf1482 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -79,8 +79,6 @@ */ #include -extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock); - /* * Pull the __raw*() functions/declarations (UP-nondebug doesnt need them): */ -- cgit v0.10.2 From a49ed0bf427a8328a3296eebedc7697fe5098dbf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Nov 2009 19:57:50 +0100 Subject: locking: Use __[SPIN|RW]_LOCK_UNLOCKED in [spin|rw]_lock_init() SPIN_LOCK_UNLOCKED and RW_LOCK_UNLOCKED are deprecated. Replace them with the __*_LOCK_UNLOCKED variants. Signed-off-by: Thomas Gleixner diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index faf1482..71dccfe 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -100,7 +100,7 @@ do { \ #else # define spin_lock_init(lock) \ - do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0) + do { *(lock) = __SPIN_LOCK_UNLOCKED(lock); } while (0) #endif #ifdef CONFIG_DEBUG_SPINLOCK @@ -114,7 +114,7 @@ do { \ } while (0) #else # define rwlock_init(lock) \ - do { *(lock) = RW_LOCK_UNLOCKED; } while (0) + do { *(lock) = __RW_LOCK_UNLOCKED(lock); } while (0) #endif #define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) -- cgit v0.10.2 From c9c7ccaf3a2686ed3a44d69bb1f8b55eeead8a4e Mon Sep 17 00:00:00 2001 From: John Kacur Date: Tue, 24 Nov 2009 15:35:00 +0100 Subject: perf tools: Add perf.data to .gitignore Signed-off-by: John Kacur Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: acme@redhat.com LKML-Reference: <1259073301-11506-2-git-send-email-jkacur@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 0854f11..fe08660 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -12,6 +12,7 @@ perf*.1 perf*.xml perf*.html common-cmds.h +perf.data tags TAGS cscope* -- cgit v0.10.2 From e74328d3a17ed75ffdf72b86f289965823a47240 Mon Sep 17 00:00:00 2001 From: John Kacur Date: Tue, 24 Nov 2009 15:35:01 +0100 Subject: perf tools: Use common process_event functions for annotate and report Prevent bit-rot in perf-annotate by using common functions where possible. Here we create process_events.[ch] to hold the common functions. Signed-off-by: John Kacur Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: acme@redhat.com LKML-Reference: <1259073301-11506-3-git-send-email-jkacur@redhat.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index f1537a9..de37d49 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -369,6 +369,7 @@ LIB_H += util/sort.h LIB_H += util/hist.h LIB_H += util/thread.h LIB_H += util/data_map.h +LIB_H += util/process_events.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -411,6 +412,7 @@ LIB_OBJS += util/svghelper.o LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o LIB_OBJS += util/data_map.o +LIB_OBJS += util/process_events.o BUILTIN_OBJS += builtin-annotate.o diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6b13a1e..59b6123 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -24,6 +24,7 @@ #include "util/thread.h" #include "util/sort.h" #include "util/hist.h" +#include "util/process_events.h" static char const *input_name = "perf.data"; @@ -202,32 +203,6 @@ got_map: } static int -process_mmap_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct map *map = map__new(&event->mmap, NULL, 0); - struct thread *thread = threads__findnew(event->mmap.pid); - - dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->mmap.pid, - (void *)(long)event->mmap.start, - (void *)(long)event->mmap.len, - (void *)(long)event->mmap.pgoff, - event->mmap.filename); - - if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); - return 0; - } - - thread__insert_map(thread, map); - total_mmap++; - - return 0; -} - -static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { struct thread *thread = threads__findnew(event->comm.pid); @@ -248,33 +223,6 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) } static int -process_fork_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->fork.pid); - struct thread *parent = threads__findnew(event->fork.ppid); - - dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->fork.pid, event->fork.ppid); - - /* - * A thread clone will have the same PID for both - * parent and child. - */ - if (thread == parent) - return 0; - - if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); - return -1; - } - total_fork++; - - return 0; -} - -static int process_event(event_t *event, unsigned long offset, unsigned long head) { switch (event->header.type) { @@ -288,7 +236,7 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return process_comm_event(event, offset, head); case PERF_RECORD_FORK: - return process_fork_event(event, offset, head); + return process_task_event(event, offset, head); /* * We dont process them right now but they are fine: */ diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fe474b7..1826be7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -30,6 +30,7 @@ #include "util/thread.h" #include "util/sort.h" #include "util/hist.h" +#include "util/process_events.h" static char const *input_name = "perf.data"; @@ -54,9 +55,6 @@ static int exclude_other = 1; static char callchain_default_opt[] = "fractal,0.5"; const char *vmlinux_name; -static char *cwd; -static int cwdlen; - static struct perf_header *header; static u64 sample_type; @@ -751,33 +749,6 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) } static int -process_mmap_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct map *map = map__new(&event->mmap, cwd, cwdlen); - struct thread *thread = threads__findnew(event->mmap.pid); - - dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->mmap.pid, - event->mmap.tid, - (void *)(long)event->mmap.start, - (void *)(long)event->mmap.len, - (void *)(long)event->mmap.pgoff, - event->mmap.filename); - - if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); - return 0; - } - - thread__insert_map(thread, map); - total_mmap++; - - return 0; -} - -static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { struct thread *thread = threads__findnew(event->comm.pid); @@ -798,38 +769,6 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) } static int -process_task_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->fork.pid); - struct thread *parent = threads__findnew(event->fork.ppid); - - dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT", - event->fork.pid, event->fork.tid, - event->fork.ppid, event->fork.ptid); - - /* - * A thread clone will have the same PID for both - * parent and child. - */ - if (thread == parent) - return 0; - - if (event->header.type == PERF_RECORD_EXIT) - return 0; - - if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); - return -1; - } - total_fork++; - - return 0; -} - -static int process_lost_event(event_t *event, unsigned long offset, unsigned long head) { dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n", diff --git a/tools/perf/util/process_event.c b/tools/perf/util/process_event.c new file mode 100644 index 0000000..a970789 --- /dev/null +++ b/tools/perf/util/process_event.c @@ -0,0 +1,53 @@ +#include "process_event.h" + +char *cwd; +int cwdlen; + +int +process_mmap_event(event_t *event, unsigned long offset, unsigned long head) +{ + struct map *map = map__new(&event->mmap, cwd, cwdlen); + struct thread *thread = threads__findnew(event->mmap.pid); + + dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->mmap.pid, + event->mmap.tid, + (void *)(long)event->mmap.start, + (void *)(long)event->mmap.len, + (void *)(long)event->mmap.pgoff, + event->mmap.filename); + + if (thread == NULL || map == NULL) { + dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); + return 0; + } + + thread__insert_map(thread, map); + total_mmap++; + + return 0; + +} + +int +process_comm_event(event_t *event, unsigned long offset, unsigned long head) +{ + struct thread *thread = threads__findnew(event->comm.pid); + + dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->comm.comm, event->comm.pid); + + if (thread == NULL || + thread__set_comm_adjust(thread, event->comm.comm)) { + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); + return -1; + } + total_comm++; + + return 0; +} + diff --git a/tools/perf/util/process_event.h b/tools/perf/util/process_event.h new file mode 100644 index 0000000..6f68c69 --- /dev/null +++ b/tools/perf/util/process_event.h @@ -0,0 +1,29 @@ +#ifndef __PROCESS_EVENT_H +#define __PROCESS_EVENT_H + +#include "../builtin.h" +#include "util.h" + +#include "color.h" +#include +#include "cache.h" +#include +#include "symbol.h" +#include "string.h" + +#include "../perf.h" +#include "debug.h" + +#include "parse-options.h" +#include "parse-events.h" + +#include "thread.h" +#include "sort.h" +#include "hist.h" + +extern char *cwd; +extern int cwdlen; +extern int process_mmap_event(event_t *, unsigned long, unsigned long); +extern int process_comm_event(event_t *, unsigned long , unsigned long); + +#endif /* __PROCESS_H */ diff --git a/tools/perf/util/process_events.c b/tools/perf/util/process_events.c new file mode 100644 index 0000000..a920436 --- /dev/null +++ b/tools/perf/util/process_events.c @@ -0,0 +1,64 @@ +#include "process_events.h" + +char *cwd; +int cwdlen; + +int +process_mmap_event(event_t *event, unsigned long offset, unsigned long head) +{ + struct map *map = map__new(&event->mmap, cwd, cwdlen); + struct thread *thread = threads__findnew(event->mmap.pid); + + dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->mmap.pid, + event->mmap.tid, + (void *)(long)event->mmap.start, + (void *)(long)event->mmap.len, + (void *)(long)event->mmap.pgoff, + event->mmap.filename); + + if (thread == NULL || map == NULL) { + dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); + return 0; + } + + thread__insert_map(thread, map); + total_mmap++; + + return 0; +} + +int +process_task_event(event_t *event, unsigned long offset, unsigned long head) +{ + struct thread *thread = threads__findnew(event->fork.pid); + struct thread *parent = threads__findnew(event->fork.ppid); + + dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT", + event->fork.pid, event->fork.tid, + event->fork.ppid, event->fork.ptid); + + /* + * A thread clone will have the same PID for both + * parent and child. + */ + if (thread == parent) + return 0; + + if (event->header.type == PERF_RECORD_EXIT) + return 0; + + if (!thread || !parent || thread__fork(thread, parent)) { + dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); + return -1; + } + total_fork++; + + return 0; +} + diff --git a/tools/perf/util/process_events.h b/tools/perf/util/process_events.h new file mode 100644 index 0000000..73d092f --- /dev/null +++ b/tools/perf/util/process_events.h @@ -0,0 +1,35 @@ +#ifndef __PROCESS_EVENTS_H +#define __PROCESS_EVENTS_H + +#include "../builtin.h" + +#include "util.h" +#include "color.h" +#include +#include "cache.h" +#include +#include "symbol.h" +#include "string.h" +#include "callchain.h" +#include "strlist.h" +#include "values.h" + +#include "../perf.h" +#include "debug.h" +#include "header.h" + +#include "parse-options.h" +#include "parse-events.h" + +#include "data_map.h" +#include "thread.h" +#include "sort.h" +#include "hist.h" + +extern char *cwd; +extern int cwdlen; + +extern int process_mmap_event(event_t *, unsigned long , unsigned long); +extern int process_task_event(event_t *, unsigned long, unsigned long); + +#endif /* __PROCESS_EVENTS_H */ -- cgit v0.10.2 From 7cc017edb9459193d3b581155a14029e4bef0c49 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Nov 2009 12:05:14 -0200 Subject: perf top: Always show the DSO column, even if its all the same MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ingo found it confusing, and I agree with that, for 'perf report' its OK because it is static, but for a tool refreshing it the eventual switch from column to summary at the top may seem confusing. Suggested-by: Ingo Molnar Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259071517-3242-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 6a5de90..b9a321f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -451,9 +451,8 @@ static void print_sym_table(void) struct sym_entry *syme, *n; struct rb_root tmp = RB_ROOT; struct rb_node *nd; - int sym_width = 0, dso_width = 0; + int sym_width = 0, dso_width = 0, max_dso_width; const int win_width = winsize.ws_col - 1; - struct dso *unique_dso = NULL, *first_dso = NULL; samples = userspace_samples = 0; @@ -539,11 +538,6 @@ static void print_sym_table(void) (int)syme->snap_count < count_filter) continue; - if (first_dso == NULL) - unique_dso = first_dso = syme->map->dso; - else if (syme->map->dso != first_dso) - unique_dso = NULL; - if (syme->map->dso->long_name_len > dso_width) dso_width = syme->map->dso->long_name_len; @@ -553,14 +547,10 @@ static void print_sym_table(void) printed = 0; - if (unique_dso) - printf("DSO: %s\n", unique_dso->long_name); - else { - int max_dso_width = winsize.ws_col - sym_width - 29; - if (dso_width > max_dso_width) - dso_width = max_dso_width; - putchar('\n'); - } + max_dso_width = winsize.ws_col - sym_width - 29; + if (dso_width > max_dso_width) + dso_width = max_dso_width; + putchar('\n'); if (nr_counters == 1) printf(" samples pcnt"); else @@ -568,17 +558,13 @@ static void print_sym_table(void) if (verbose) printf(" RIP "); - printf(" %-*.*s", sym_width, sym_width, "function"); - if (!unique_dso) - printf(" DSO"); - putchar('\n'); + printf(" %-*.*s DSO\n", sym_width, sym_width, "function"); printf(" %s _______ _____", nr_counters == 1 ? " " : "______"); if (verbose) printf(" ________________"); printf(" %-*.*s", sym_width, sym_width, graph_line); - if (!unique_dso) - printf(" %-*.*s", dso_width, dso_width, graph_line); + printf(" %-*.*s", dso_width, dso_width, graph_line); puts("\n"); for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { @@ -603,12 +589,10 @@ static void print_sym_table(void) if (verbose) printf(" %016llx", sym->start); printf(" %-*.*s", sym_width, sym_width, sym->name); - if (!unique_dso) - printf(" %-*.*s", dso_width, dso_width, - dso_width >= syme->map->dso->long_name_len ? - syme->map->dso->long_name : - syme->map->dso->short_name); - printf("\n"); + printf(" %-*.*s\n", dso_width, dso_width, + dso_width >= syme->map->dso->long_name_len ? + syme->map->dso->long_name : + syme->map->dso->short_name); } } -- cgit v0.10.2 From b32d133aec5dc882cf783a293f393bfb3f4379e1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Nov 2009 12:05:15 -0200 Subject: perf symbols: Simplify symbol machinery setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And also express its configuration toggles via a struct. Now all one has to do is to call symbol__init(NULL) if the defaults are OK, or pass a struct symbol_conf pointer with the desired configuration. If a tool uses kernel_maps__find_symbol() to look at the kernel and modules mappings for a symbol but didn't call symbol__init() first, that will generate a one time warning too, alerting the subcommand developer that symbol__init() must be called. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259071517-3242-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 59b6123..cd97c2b 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -34,11 +34,9 @@ static int input; static int full_paths; static int print_line; -static bool use_modules; static unsigned long page_size; static unsigned long mmap_window = 32; -const char *vmlinux_name; struct sym_hist { u64 sum; @@ -56,6 +54,11 @@ struct sym_priv { struct sym_ext *ext; }; +static struct symbol_conf symbol_conf = { + .priv_size = sizeof(struct sym_priv), + .try_vmlinux_path = true, +}; + static const char *sym_hist_filter; static int symbol_filter(struct map *map __used, struct symbol *sym) @@ -586,11 +589,6 @@ static int __cmd_annotate(void) exit(0); } - if (kernel_maps__init(vmlinux_name, true, use_modules) < 0) { - pr_err("failed to create kernel maps for symbol resolution\b"); - return -1; - } - remap: buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, MAP_SHARED, input, offset); @@ -691,8 +689,9 @@ static const struct option options[] = { "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), - OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), - OPT_BOOLEAN('m', "modules", &use_modules, + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('l', "print-line", &print_line, "print matching source lines (may be slow)"), @@ -718,7 +717,8 @@ static void setup_sorting(void) int cmd_annotate(int argc, const char **argv, const char *prefix __used) { - symbol__init(sizeof(struct sym_priv)); + if (symbol__init(&symbol_conf) < 0) + return -1; page_size = getpagesize(); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 173d6db..330dbc7 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -412,7 +412,7 @@ static int read_events(void) register_idle_thread(); register_perf_file_handler(&file_handler); - return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0, + return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1826be7..0ee3d05 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -39,7 +39,6 @@ static char *dso_list_str, *comm_list_str, *sym_list_str, static struct strlist *dso_list, *comm_list, *sym_list; static int force; -static bool use_modules; static int full_paths; static int show_nr_samples; @@ -53,12 +52,13 @@ static char *pretty_printing_style = default_pretty_printing_style; static int exclude_other = 1; static char callchain_default_opt[] = "fractal,0.5"; -const char *vmlinux_name; static struct perf_header *header; static u64 sample_type; +struct symbol_conf symbol_conf; + static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) @@ -865,8 +865,7 @@ static int __cmd_report(void) register_perf_file_handler(&file_handler); - ret = mmap_dispatch_perf_file(&header, input_name, vmlinux_name, - !vmlinux_name, force, + ret = mmap_dispatch_perf_file(&header, input_name, force, full_paths, &cwdlen, &cwd); if (ret) return ret; @@ -963,9 +962,10 @@ static const struct option options[] = { "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), - OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), - OPT_BOOLEAN('m', "modules", &use_modules, + OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, "Show a column with the number of samples"), @@ -1035,7 +1035,8 @@ static void setup_list(struct strlist **list, const char *list_str, int cmd_report(int argc, const char **argv, const char *prefix __used) { - symbol__init(0); + if (symbol__init(&symbol_conf) < 0) + return -1; argc = parse_options(argc, argv, options, report_usage, 0); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 260f57a..dbf089b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1718,7 +1718,7 @@ static int read_events(void) register_idle_thread(); register_perf_file_handler(&file_handler); - return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0, + return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index b9a321f..a212475 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -79,7 +79,7 @@ static int dump_symtab = 0; static bool hide_kernel_symbols = false; static bool hide_user_symbols = false; static struct winsize winsize; -const char *vmlinux_name; +struct symbol_conf symbol_conf; /* * Source @@ -128,7 +128,7 @@ struct sym_entry { static inline struct symbol *sym_entry__symbol(struct sym_entry *self) { - return ((void *)self) + symbol__priv_size; + return ((void *)self) + symbol_conf.priv_size; } static void get_term_dimensions(struct winsize *ws) @@ -695,7 +695,7 @@ static void print_mapped_keys(void) fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); - if (vmlinux_name) { + if (symbol_conf.vmlinux_name) { fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); @@ -732,7 +732,7 @@ static int key_mapped(int c) case 'F': case 's': case 'S': - return vmlinux_name ? 1 : 0; + return symbol_conf.vmlinux_name ? 1 : 0; default: break; } @@ -1261,7 +1261,8 @@ static const struct option options[] = { "system-wide collection from all CPUs"), OPT_INTEGER('C', "CPU", &profile_cpu, "CPU to profile on"), - OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), OPT_BOOLEAN('K', "hide_kernel_symbols", &hide_kernel_symbols, "hide kernel symbols"), OPT_INTEGER('m', "mmap-pages", &mmap_pages, @@ -1295,7 +1296,7 @@ static const struct option options[] = { int cmd_top(int argc, const char **argv, const char *prefix __used) { - int counter, err; + int counter; page_size = sysconf(_SC_PAGE_SIZE); @@ -1313,15 +1314,16 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (!nr_counters) nr_counters = 1; - symbol__init(sizeof(struct sym_entry) + - (nr_counters + 1) * sizeof(unsigned long)); + symbol_conf.priv_size = (sizeof(struct sym_entry) + + (nr_counters + 1) * sizeof(unsigned long)); + if (symbol_conf.vmlinux_name == NULL) + symbol_conf.try_vmlinux_path = true; + if (symbol__init(&symbol_conf) < 0) + return -1; if (delay_secs < 1) delay_secs = 1; - err = kernel_maps__init(vmlinux_name, !vmlinux_name, true); - if (err < 0) - return err; parse_source(sym_filter_entry); /* diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b71198e..75972fd 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -131,7 +131,7 @@ static int __cmd_trace(void) register_idle_thread(); register_perf_file_handler(&file_handler); - return mmap_dispatch_perf_file(&header, input_name, NULL, false, + return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); } diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index f318d19..b238462 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -101,8 +101,6 @@ out: int mmap_dispatch_perf_file(struct perf_header **pheader, const char *input_name, - const char *vmlinux_name, - bool try_vmlinux_path, int force, int full_paths, int *cwdlen, @@ -172,12 +170,6 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, curr_handler->sample_type_check(sample_type) < 0) goto out_delete; - err = -ENOMEM; - if (kernel_maps__init(vmlinux_name, try_vmlinux_path, true) < 0) { - pr_err("failed to setup the kernel maps to resolve symbols\n"); - goto out_delete; - } - if (!full_paths) { if (getcwd(__cwd, sizeof(__cwd)) == NULL) { pr_err("failed to get the current directory\n"); diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h index 3f0d21b..ae036ec 100644 --- a/tools/perf/util/data_map.h +++ b/tools/perf/util/data_map.h @@ -23,8 +23,6 @@ struct perf_file_handler { void register_perf_file_handler(struct perf_file_handler *handler); int mmap_dispatch_perf_file(struct perf_header **pheader, const char *input_name, - const char *vmlinux_name, - bool try_vmlinux_path, int force, int full_paths, int *cwdlen, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1332f8e..271a160 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -253,12 +253,6 @@ static int perf_header__adds_write(struct perf_header *self, int fd) buildid_sec = &feat_sec[idx++]; - /* - * Read the kernel buildid nad the list of loaded modules with - * its build_ids: - */ - kernel_maps__init(NULL, false, true); - /* Write build-ids */ buildid_sec->offset = lseek(fd, 0, SEEK_CUR); err = dsos__write_buildid_table(fd); diff --git a/tools/perf/util/include/asm/bug.h b/tools/perf/util/include/asm/bug.h new file mode 100644 index 0000000..7fcc681 --- /dev/null +++ b/tools/perf/util/include/asm/bug.h @@ -0,0 +1,22 @@ +#ifndef _PERF_ASM_GENERIC_BUG_H +#define _PERF_ASM_GENERIC_BUG_H + +#define __WARN_printf(arg...) do { fprintf(stderr, arg); } while (0) + +#define WARN(condition, format...) ({ \ + int __ret_warn_on = !!(condition); \ + if (unlikely(__ret_warn_on)) \ + __WARN_printf(format); \ + unlikely(__ret_warn_on); \ +}) + +#define WARN_ONCE(condition, format...) ({ \ + static int __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once)) \ + if (WARN(!__warned, format)) \ + __warned = 1; \ + unlikely(__ret_warn_once); \ +}) +#endif diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 44d81d5..c4ca974 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -6,6 +6,7 @@ #include "debug.h" +#include #include #include #include @@ -37,6 +38,11 @@ unsigned int symbol__priv_size; static int vmlinux_path__nr_entries; static char **vmlinux_path; +static struct symbol_conf symbol_conf__defaults = { + .use_modules = true, + .try_vmlinux_path = true, +}; + static struct rb_root kernel_maps; static void dso__fixup_sym_end(struct dso *self) @@ -1166,7 +1172,9 @@ struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp, if (map) { ip = map->map_ip(map, ip); return map__find_symbol(map, ip, filter); - } + } else + WARN_ONCE(RB_EMPTY_ROOT(&kernel_maps), + "Empty kernel_maps, was symbol__init() called?\n"); return NULL; } @@ -1485,9 +1493,9 @@ size_t dsos__fprintf_buildid(FILE *fp) return ret; } -static int kernel_maps__create_kernel_map(const char *vmlinux_name) +static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) { - struct dso *kernel = dso__new(vmlinux_name ?: "[kernel.kallsyms]"); + struct dso *kernel = dso__new(conf->vmlinux_name ?: "[kernel.kallsyms]"); if (kernel == NULL) return -1; @@ -1577,18 +1585,21 @@ out_fail: return -1; } -int kernel_maps__init(const char *vmlinux_name, bool try_vmlinux_path, - bool use_modules) +static int kernel_maps__init(const struct symbol_conf *conf) { - if (try_vmlinux_path && vmlinux_path__init() < 0) + const struct symbol_conf *pconf = conf ?: &symbol_conf__defaults; + + symbol__priv_size = pconf->priv_size; + + if (pconf->try_vmlinux_path && vmlinux_path__init() < 0) return -1; - if (kernel_maps__create_kernel_map(vmlinux_name) < 0) { + if (kernel_maps__create_kernel_map(pconf) < 0) { vmlinux_path__exit(); return -1; } - if (use_modules && kernel_maps__create_module_maps() < 0) + if (pconf->use_modules && kernel_maps__create_module_maps() < 0) pr_debug("Failed to load list of modules in use, " "continuing...\n"); /* @@ -1598,8 +1609,8 @@ int kernel_maps__init(const char *vmlinux_name, bool try_vmlinux_path, return 0; } -void symbol__init(unsigned int priv_size) +int symbol__init(struct symbol_conf *conf) { elf_version(EV_CURRENT); - symbol__priv_size = priv_size; + return kernel_maps__init(conf); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 8c4d026..5538691 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -49,6 +49,13 @@ struct symbol { char name[0]; }; +struct symbol_conf { + unsigned short priv_size; + bool try_vmlinux_path, + use_modules; + const char *vmlinux_name; +}; + extern unsigned int symbol__priv_size; static inline void *symbol__priv(struct symbol *self) @@ -93,11 +100,9 @@ int sysfs__read_build_id(const char *filename, void *bf, size_t size); bool dsos__read_build_ids(void); int build_id__sprintf(u8 *self, int len, char *bf); -int kernel_maps__init(const char *vmlinux_name, bool try_vmlinux_path, - bool use_modules); size_t kernel_maps__fprintf(FILE *fp); -void symbol__init(unsigned int priv_size); +int symbol__init(struct symbol_conf *conf); extern struct list_head dsos; extern struct map *kernel_map; -- cgit v0.10.2 From 364794845cbc49e638b83d7ef739524291e1e961 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Nov 2009 12:05:16 -0200 Subject: perf tools: Introduce zalloc() for the common calloc(1, N) case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way we type less characters and it looks more like the kzalloc kernel counterpart. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259071517-3242-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 5165fd1..8977317 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -127,11 +127,11 @@ int bench_mem_memcpy(int argc, const char **argv, return 1; } - dst = calloc(length, sizeof(char)); + dst = zalloc(length); if (!dst) die("memory allocation failed - maybe length is too large?\n"); - src = calloc(length, sizeof(char)); + src = zalloc(length); if (!src) die("memory allocation failed - maybe length is too large?\n"); diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 768f9c8..9f810b1 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -179,7 +179,7 @@ static void add_man_viewer(const char *name) while (*p) p = &((*p)->next); - *p = calloc(1, (sizeof(**p) + len + 1)); + *p = zalloc(sizeof(**p) + len + 1); strncpy((*p)->name, name, len); } @@ -194,7 +194,7 @@ static void do_add_man_viewer_info(const char *name, size_t len, const char *value) { - struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1); + struct man_viewer_info_list *new = zalloc(sizeof(*new) + len + 1); strncpy(new->name, name, len); new->info = strdup(value); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index d78a3d9..a2f6daf 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -309,9 +309,9 @@ static int synthesize_probe_event(struct probe_point *pp) { char *buf; int i, len, ret; - pp->probes[0] = buf = (char *)calloc(MAX_CMDLEN, sizeof(char)); + pp->probes[0] = buf = zalloc(MAX_CMDLEN); if (!buf) - die("Failed to allocate memory by calloc."); + die("Failed to allocate memory by zalloc."); ret = snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); if (ret <= 0 || ret >= MAX_CMDLEN) goto error; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index dbf089b..19eb708 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -225,7 +225,7 @@ static void calibrate_sleep_measurement_overhead(void) static struct sched_atom * get_new_event(struct task_desc *task, u64 timestamp) { - struct sched_atom *event = calloc(1, sizeof(*event)); + struct sched_atom *event = zalloc(sizeof(*event)); unsigned long idx = task->nr_events; size_t size; @@ -293,7 +293,7 @@ add_sched_event_wakeup(struct task_desc *task, u64 timestamp, return; } - wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem)); + wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); sem_init(wakee_event->wait_sem, 0, 0); wakee_event->specific_wait = 1; event->wait_sem = wakee_event->wait_sem; @@ -323,7 +323,7 @@ static struct task_desc *register_pid(unsigned long pid, const char *comm) if (task) return task; - task = calloc(1, sizeof(*task)); + task = zalloc(sizeof(*task)); task->pid = pid; task->nr = nr_tasks; strcpy(task->comm, comm); @@ -962,9 +962,7 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data, static void thread_atoms_insert(struct thread *thread) { - struct work_atoms *atoms; - - atoms = calloc(sizeof(*atoms), 1); + struct work_atoms *atoms = zalloc(sizeof(*atoms)); if (!atoms) die("No memory"); @@ -996,9 +994,7 @@ add_sched_out_event(struct work_atoms *atoms, char run_state, u64 timestamp) { - struct work_atom *atom; - - atom = calloc(sizeof(*atom), 1); + struct work_atom *atom = zalloc(sizeof(*atom)); if (!atom) die("Non memory"); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a212475..4c8653a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -181,7 +181,7 @@ static void parse_source(struct sym_entry *syme) return; if (syme->src == NULL) { - syme->src = calloc(1, sizeof(*source)); + syme->src = zalloc(sizeof(*source)); if (syme->src == NULL) return; pthread_mutex_init(&syme->src->lock, NULL); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 271a160..4b58656 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -63,7 +63,7 @@ int perf_header_attr__add_id(struct perf_header_attr *self, u64 id) */ struct perf_header *perf_header__new(void) { - struct perf_header *self = calloc(sizeof(*self), 1); + struct perf_header *self = zalloc(sizeof(*self)); if (self != NULL) { self->size = 1; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 0700274..9e5dbd6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -197,7 +197,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) if (id == config) { closedir(evt_dir); closedir(sys_dir); - path = calloc(1, sizeof(path)); + path = zalloc(sizeof(path)); path->system = malloc(MAX_EVENT_LENGTH); if (!path->system) { free(path); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index c4ca974..8db85b4 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -94,15 +94,14 @@ static void kernel_maps__fixup_end(void) static struct symbol *symbol__new(u64 start, u64 len, const char *name) { size_t namelen = strlen(name) + 1; - struct symbol *self = calloc(1, (symbol__priv_size + - sizeof(*self) + namelen)); - if (!self) + struct symbol *self = zalloc(symbol__priv_size + + sizeof(*self) + namelen); + if (self == NULL) return NULL; - if (symbol__priv_size) { - memset(self, 0, symbol__priv_size); + if (symbol__priv_size) self = ((void *)self) + symbol__priv_size; - } + self->start = start; self->end = len ? start + len - 1 : start; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 0f6d78c..1796625 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -11,7 +11,7 @@ static struct thread *last_match; static struct thread *thread__new(pid_t pid) { - struct thread *self = calloc(1, sizeof(*self)); + struct thread *self = zalloc(sizeof(*self)); if (self != NULL) { self->pid = pid; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index e1c623e..30c5517 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -302,6 +302,11 @@ extern int xdup(int fd); extern FILE *xfdopen(int fd, const char *mode); extern int xmkstemp(char *template); +static inline void *zalloc(size_t size) +{ + return calloc(1, size); +} + static inline size_t xsize_t(off_t len) { return (size_t)len; -- cgit v0.10.2 From 727dad10c17cbaade3cb6a56bd4863a4630f4d13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Nov 2009 12:05:17 -0200 Subject: perf tools: Remove unused wrapper routines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And also make xrealloc and xmalloc weak symbols so that we don't have this problem: /usr/lib/gcc/x86_64-redhat-linux/4.4.1/../../../../lib64/libiberty.a(xmalloc.o): In function `xrealloc': (.text+0xc0): multiple definition of `xrealloc' libperf.a(wrapper.o):/home/acme_unencrypted/git/linux-2.6-tip/tools/perf/util/wrapper.c:67: first defined here collect2: ld returned 1 exit status Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259071517-3242-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 30c5517..c673d88 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -290,17 +290,10 @@ static inline char *gitstrchrnul(const char *s, int c) * Wrappers: */ extern char *xstrdup(const char *str); -extern void *xmalloc(size_t size); +extern void *xmalloc(size_t size) __attribute__((weak)); extern void *xmemdupz(const void *data, size_t len); extern char *xstrndup(const char *str, size_t len); -extern void *xrealloc(void *ptr, size_t size); -extern void *xcalloc(size_t nmemb, size_t size); -extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); -extern ssize_t xread(int fd, void *buf, size_t len); -extern ssize_t xwrite(int fd, const void *buf, size_t len); -extern int xdup(int fd); -extern FILE *xfdopen(int fd, const char *mode); -extern int xmkstemp(char *template); +extern void *xrealloc(void *ptr, size_t size) __attribute__((weak)); static inline void *zalloc(size_t size) { diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c index 4574ac2..bf44ca8 100644 --- a/tools/perf/util/wrapper.c +++ b/tools/perf/util/wrapper.c @@ -79,43 +79,12 @@ void *xrealloc(void *ptr, size_t size) return ret; } -void *xcalloc(size_t nmemb, size_t size) -{ - void *ret = calloc(nmemb, size); - if (!ret && (!nmemb || !size)) - ret = calloc(1, 1); - if (!ret) { - release_pack_memory(nmemb * size, -1); - ret = calloc(nmemb, size); - if (!ret && (!nmemb || !size)) - ret = calloc(1, 1); - if (!ret) - die("Out of memory, calloc failed"); - } - return ret; -} - -void *xmmap(void *start, size_t length, - int prot, int flags, int fd, off_t offset) -{ - void *ret = mmap(start, length, prot, flags, fd, offset); - if (ret == MAP_FAILED) { - if (!length) - return NULL; - release_pack_memory(length, fd); - ret = mmap(start, length, prot, flags, fd, offset); - if (ret == MAP_FAILED) - die("Out of memory? mmap failed: %s", strerror(errno)); - } - return ret; -} - /* * xread() is the same a read(), but it automatically restarts read() * operations with a recoverable error (EAGAIN and EINTR). xread() * DOES NOT GUARANTEE that "len" bytes is read even if the data is available. */ -ssize_t xread(int fd, void *buf, size_t len) +static ssize_t xread(int fd, void *buf, size_t len) { ssize_t nr; while (1) { @@ -131,7 +100,7 @@ ssize_t xread(int fd, void *buf, size_t len) * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT * GUARANTEE that "len" bytes is written even if the operation is successful. */ -ssize_t xwrite(int fd, const void *buf, size_t len) +static ssize_t xwrite(int fd, const void *buf, size_t len) { ssize_t nr; while (1) { @@ -179,29 +148,3 @@ ssize_t write_in_full(int fd, const void *buf, size_t count) return total; } - -int xdup(int fd) -{ - int ret = dup(fd); - if (ret < 0) - die("dup failed: %s", strerror(errno)); - return ret; -} - -FILE *xfdopen(int fd, const char *mode) -{ - FILE *stream = fdopen(fd, mode); - if (stream == NULL) - die("Out of memory? fdopen failed: %s", strerror(errno)); - return stream; -} - -int xmkstemp(char *template) -{ - int fd; - - fd = mkstemp(template); - if (fd < 0) - die("Unable to create temporary file: %s", strerror(errno)); - return fd; -} -- cgit v0.10.2 From fcf1203a919c3a3d212c0ed01f5240fd592bf5ae Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 24 Nov 2009 13:01:52 -0200 Subject: perf symbols: Rename find_symbol routines to find_function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Paving the way for supporting variable in adition to function symbols. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259074912-5924-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index cd97c2b..18ac5ea 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -162,7 +162,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (event->header.misc & PERF_RECORD_MISC_KERNEL) { level = 'k'; - sym = kernel_maps__find_symbol(ip, &map, symbol_filter); + sym = kernel_maps__find_function(ip, &map, symbol_filter); dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); } else if (event->header.misc & PERF_RECORD_MISC_USER) { @@ -171,7 +171,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (map != NULL) { got_map: ip = map->map_ip(map, ip); - sym = map__find_symbol(map, ip, symbol_filter); + sym = map__find_function(map, ip, symbol_filter); } else { /* * If this is outside of all known maps, diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 330dbc7..35722fa 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -445,8 +445,7 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) if (is_caller) { addr = data->call_site; if (!raw_ip) - sym = kernel_maps__find_symbol(addr, - NULL, NULL); + sym = kernel_maps__find_function(addr, NULL, NULL); } else addr = data->ptr; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0ee3d05..e4b1004 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -448,14 +448,14 @@ got_map: * trick of looking in the whole kernel symbol list. */ if ((long long)ip < 0) - return kernel_maps__find_symbol(ip, mapp, NULL); + return kernel_maps__find_function(ip, mapp, NULL); } dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip); *ipp = ip; - return map ? map__find_symbol(map, ip, NULL) : NULL; + return map ? map__find_function(map, ip, NULL) : NULL; } static int call__match(struct symbol *sym) @@ -495,7 +495,7 @@ static struct symbol **resolve_callchain(struct thread *thread, case PERF_CONTEXT_HV: break; case PERF_CONTEXT_KERNEL: - sym = kernel_maps__find_symbol(ip, NULL, NULL); + sym = kernel_maps__find_function(ip, NULL, NULL); break; default: sym = resolve_symbol(thread, NULL, &ip); @@ -715,7 +715,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (cpumode == PERF_RECORD_MISC_KERNEL) { level = 'k'; - sym = kernel_maps__find_symbol(ip, &map, NULL); + sym = kernel_maps__find_function(ip, &map, NULL); dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); } else if (cpumode == PERF_RECORD_MISC_USER) { diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4c8653a..ded6cf6 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -948,7 +948,7 @@ static void event__process_sample(const event_t *self, int counter) map = thread__find_map(thread, ip); if (map != NULL) { ip = map->map_ip(map, ip); - sym = map__find_symbol(map, ip, symbol_filter); + sym = map__find_function(map, ip, symbol_filter); if (sym == NULL) return; userspace_samples++; @@ -968,7 +968,7 @@ static void event__process_sample(const event_t *self, int counter) if (hide_kernel_symbols) return; - sym = kernel_maps__find_symbol(ip, &map, symbol_filter); + sym = kernel_maps__find_function(ip, &map, symbol_filter); if (sym == NULL) return; break; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index f1e3926..882a953 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -119,9 +119,10 @@ void map__delete(struct map *self); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); -struct symbol *map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter); -void map__fixup_start(struct map *self); -void map__fixup_end(struct map *self); +struct symbol *map__find_function(struct map *self, u64 ip, + symbol_filter_t filter); +void map__fixup_start(struct map *self, struct rb_root *symbols); +void map__fixup_end(struct map *self, struct rb_root *symbols); int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)); void event__synthesize_threads(int (*process)(event_t *event)); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 0941232..41c5c4a 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -80,18 +80,18 @@ void map__delete(struct map *self) free(self); } -void map__fixup_start(struct map *self) +void map__fixup_start(struct map *self, struct rb_root *symbols) { - struct rb_node *nd = rb_first(&self->dso->syms); + struct rb_node *nd = rb_first(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); self->start = sym->start; } } -void map__fixup_end(struct map *self) +void map__fixup_end(struct map *self, struct rb_root *symbols) { - struct rb_node *nd = rb_last(&self->dso->syms); + struct rb_node *nd = rb_last(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); self->end = sym->end; @@ -100,8 +100,8 @@ void map__fixup_end(struct map *self) #define DSO__DELETED "(deleted)" -struct symbol * -map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter) +struct symbol *map__find_function(struct map *self, u64 ip, + symbol_filter_t filter) { if (!self->dso->loaded) { int nr = dso__load(self->dso, self, filter); @@ -136,7 +136,7 @@ map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter) } } - return self->dso->find_symbol(self->dso, ip); + return self->dso->find_function(self->dso, ip); } struct map *map__clone(struct map *self) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8db85b4..4ed379b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -45,9 +45,9 @@ static struct symbol_conf symbol_conf__defaults = { static struct rb_root kernel_maps; -static void dso__fixup_sym_end(struct dso *self) +static void symbols__fixup_end(struct rb_root *self) { - struct rb_node *nd, *prevnd = rb_first(&self->syms); + struct rb_node *nd, *prevnd = rb_first(self); struct symbol *curr, *prev; if (prevnd == NULL) @@ -144,8 +144,8 @@ struct dso *dso__new(const char *name) strcpy(self->name, name); dso__set_long_name(self, self->name); self->short_name = self->name; - self->syms = RB_ROOT; - self->find_symbol = dso__find_symbol; + self->functions = RB_ROOT; + self->find_function = dso__find_function; self->slen_calculated = 0; self->origin = DSO__ORIG_NOT_FOUND; self->loaded = 0; @@ -155,22 +155,22 @@ struct dso *dso__new(const char *name) return self; } -static void dso__delete_symbols(struct dso *self) +static void symbols__delete(struct rb_root *self) { struct symbol *pos; - struct rb_node *next = rb_first(&self->syms); + struct rb_node *next = rb_first(self); while (next) { pos = rb_entry(next, struct symbol, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, &self->syms); + rb_erase(&pos->rb_node, self); symbol__delete(pos); } } void dso__delete(struct dso *self) { - dso__delete_symbols(self); + symbols__delete(&self->functions); if (self->long_name != self->name) free(self->long_name); free(self); @@ -182,9 +182,9 @@ void dso__set_build_id(struct dso *self, void *build_id) self->has_build_id = 1; } -static void dso__insert_symbol(struct dso *self, struct symbol *sym) +static void symbols__insert(struct rb_root *self, struct symbol *sym) { - struct rb_node **p = &self->syms.rb_node; + struct rb_node **p = &self->rb_node; struct rb_node *parent = NULL; const u64 ip = sym->start; struct symbol *s; @@ -198,17 +198,17 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym) p = &(*p)->rb_right; } rb_link_node(&sym->rb_node, parent, p); - rb_insert_color(&sym->rb_node, &self->syms); + rb_insert_color(&sym->rb_node, self); } -struct symbol *dso__find_symbol(struct dso *self, u64 ip) +static struct symbol *symbols__find(struct rb_root *self, u64 ip) { struct rb_node *n; if (self == NULL) return NULL; - n = self->syms.rb_node; + n = self->rb_node; while (n) { struct symbol *s = rb_entry(n, struct symbol, rb_node); @@ -224,6 +224,11 @@ struct symbol *dso__find_symbol(struct dso *self, u64 ip) return NULL; } +struct symbol *dso__find_function(struct dso *self, u64 ip) +{ + return symbols__find(&self->functions, ip); +} + int build_id__sprintf(u8 *self, int len, char *bf) { char *bid = bf; @@ -253,9 +258,9 @@ size_t dso__fprintf(struct dso *self, FILE *fp) size_t ret = fprintf(fp, "dso: %s (", self->short_name); ret += dso__fprintf_buildid(self, fp); - ret += fprintf(fp, ")\n"); + ret += fprintf(fp, ")\nFunctions:\n"); - for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) { + for (nd = rb_first(&self->functions); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); ret += symbol__fprintf(pos, fp); } @@ -320,7 +325,7 @@ static int kernel_maps__load_all_kallsyms(void) * kernel_maps__split_kallsyms, when we have split the * maps per module */ - dso__insert_symbol(kernel_map->dso, sym); + symbols__insert(&kernel_map->dso->functions, sym); } free(line); @@ -344,7 +349,7 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) struct map *map = kernel_map; struct symbol *pos; int count = 0; - struct rb_node *next = rb_first(&kernel_map->dso->syms); + struct rb_node *next = rb_first(&kernel_map->dso->functions); int kernel_range = 0; while (next) { @@ -394,12 +399,13 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) } if (filter && filter(map, pos)) { - rb_erase(&pos->rb_node, &kernel_map->dso->syms); + rb_erase(&pos->rb_node, &kernel_map->dso->functions); symbol__delete(pos); } else { if (map != kernel_map) { - rb_erase(&pos->rb_node, &kernel_map->dso->syms); - dso__insert_symbol(map->dso, pos); + rb_erase(&pos->rb_node, + &kernel_map->dso->functions); + symbols__insert(&map->dso->functions, pos); } count++; } @@ -414,7 +420,7 @@ static int kernel_maps__load_kallsyms(symbol_filter_t filter) if (kernel_maps__load_all_kallsyms()) return -1; - dso__fixup_sym_end(kernel_map->dso); + symbols__fixup_end(&kernel_map->dso->functions); kernel_map->dso->origin = DSO__ORIG_KERNEL; return kernel_maps__split_kallsyms(filter); @@ -485,7 +491,7 @@ static int dso__load_perf_map(struct dso *self, struct map *map, if (filter && filter(map, sym)) symbol__delete(sym); else { - dso__insert_symbol(self, sym); + symbols__insert(&self->functions, sym); nr_syms++; } } @@ -683,7 +689,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, struct map *map, if (filter && filter(map, f)) symbol__delete(f); else { - dso__insert_symbol(self, f); + symbols__insert(&self->functions, f); ++nr; } } @@ -705,7 +711,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, struct map *map, if (filter && filter(map, f)) symbol__delete(f); else { - dso__insert_symbol(self, f); + symbols__insert(&self->functions, f); ++nr; } } @@ -879,7 +885,7 @@ new_symbol: if (filter && filter(curr_map, f)) symbol__delete(f); else { - dso__insert_symbol(curr_dso, f); + symbols__insert(&curr_dso->functions, f); nr++; } } @@ -888,7 +894,7 @@ new_symbol: * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) - dso__fixup_sym_end(self); + symbols__fixup_end(&self->functions); err = nr; out_elf_end: elf_end(elf); @@ -1160,8 +1166,8 @@ static void kernel_maps__insert(struct map *map) maps__insert(&kernel_maps, map); } -struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp, - symbol_filter_t filter) +struct symbol *kernel_maps__find_function(u64 ip, struct map **mapp, + symbol_filter_t filter) { struct map *map = maps__find(&kernel_maps, ip); @@ -1170,7 +1176,7 @@ struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp, if (map) { ip = map->map_ip(map, ip); - return map__find_symbol(map, ip, filter); + return map__find_function(map, ip, filter); } else WARN_ONCE(RB_EMPTY_ROOT(&kernel_maps), "Empty kernel_maps, was symbol__init() called?\n"); @@ -1432,8 +1438,8 @@ do_kallsyms: if (err > 0) { out_fixup: - map__fixup_start(map); - map__fixup_end(map); + map__fixup_start(map, &map->dso->functions); + map__fixup_end(map, &map->dso->functions); } return err; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5538691..65846d0 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -65,8 +65,8 @@ static inline void *symbol__priv(struct symbol *self) struct dso { struct list_head node; - struct rb_root syms; - struct symbol *(*find_symbol)(struct dso *, u64 ip); + struct rb_root functions; + struct symbol *(*find_function)(struct dso *, u64 ip); u8 adjust_symbols:1; u8 slen_calculated:1; u8 loaded:1; @@ -83,7 +83,7 @@ struct dso { struct dso *dso__new(const char *name); void dso__delete(struct dso *self); -struct symbol *dso__find_symbol(struct dso *self, u64 ip); +struct symbol *dso__find_function(struct dso *self, u64 ip); struct dso *dsos__findnew(const char *name); int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index e4b8d43..74cba64 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -26,8 +26,8 @@ size_t threads__fprintf(FILE *fp); void maps__insert(struct rb_root *maps, struct map *map); struct map *maps__find(struct rb_root *maps, u64 ip); -struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp, - symbol_filter_t filter); +struct symbol *kernel_maps__find_function(const u64 ip, struct map **mapp, + symbol_filter_t filter); struct map *kernel_maps__find_by_dso_name(const char *name); static inline struct map *thread__find_map(struct thread *self, u64 ip) -- cgit v0.10.2 From 1261a02a0c0ab8e643125705f0d1d83e5090e4d1 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 24 Nov 2009 05:27:18 -0800 Subject: perf_events, x86: Fix validate_event bug The validate_event() was failing on valid event combinations. The function was assuming that if x86_schedule_event() returned 0, it meant error. But x86_schedule_event() returns the counter index and 0 is a perfectly valid value. An error is returned if the function returns a negative value. Furthermore, validate_event() was also failing for event groups because the event->pmu was not set until after hw_perf_event_init(). Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: paulus@samba.org Cc: perfmon2-devel@lists.sourceforge.net Cc: eranian@gmail.com LKML-Reference: <4b0bdf36.1818d00a.07cc.25ae@mx.google.com> Signed-off-by: Ingo Molnar -- arch/x86/kernel/cpu/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bd87430..c1bbed1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2229,10 +2229,10 @@ validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) { struct hw_perf_event fake_event = event->hw; - if (event->pmu != &pmu) + if (event->pmu && event->pmu != &pmu) return 0; - return x86_schedule_event(cpuc, &fake_event); + return x86_schedule_event(cpuc, &fake_event) >= 0; } static int validate_group(struct perf_event *event) -- cgit v0.10.2 From fe6126722718e51fba4879517c11ac12d9775bcc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 24 Nov 2009 20:38:22 +0100 Subject: perf_events: Fix bad software/trace event recursion counting Commit 4ed7c92d68a5387ba5f7030dc76eab03558e27f5 (perf_events: Undo some recursion damage) has introduced a bad reference counting of the recursion context. putting the context behaves like getting it, dropping every software/trace events after the first one in a context. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Arjan van de Ven Cc: Li Zefan Cc: Steven Rostedt LKML-Reference: <1259091502-5171-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index accfd7bf..35df94e 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3914,7 +3914,7 @@ void perf_swevent_put_recursion_context(int rctx) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); barrier(); - cpuctx->recursion[rctx]++; + cpuctx->recursion[rctx]--; put_cpu_var(perf_cpu_context); } EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); -- cgit v0.10.2 From ff038f5c37c2070829004a0678372766c2b32180 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:27:27 -0500 Subject: tracing: Create new TRACE_EVENT_TEMPLATE There are some places in the kernel that define several tracepoints and they are all identical besides the name. The code to enable, disable and record is created for every trace point even if most of the code is identical. This patch adds TRACE_EVENT_TEMPLATE that lets the developer create a template TRACE_EVENT and create trace points with DEFINE_EVENT, which is based off of a given template. Each trace point used by this will share most of the code, and bring down the size of the kernel when there are several duplicate events. Usage is: TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print); Which would be the same as defining a normal TRACE_EVENT. To create the trace events that the trace points will use: DEFINE_EVENT(template, name, proto, args) is done. The template is the name of the TRACE_EVENT_TEMPLATE to use. The name is the name of the trace point. The parameters proto and args must be the same as the proto and args of the template. If they are not the same, then a compile error will result. I tried hard removing this duplication but the C preprocessor is not powerful enough (or my CPP magic experience points is not at a high enough level) to not need them. A lot of trace events are coming in with new XFS development. Most of the trace points are identical except for the name. The following shows the advantage of having TRACE_EVENT_TEMPLATE: $ size fs/xfs/xfs.o.* text data bss dec hex filename 452114 2788 3520 458422 6feb6 fs/xfs/xfs.o.old 638482 38116 3744 680342 a6196 fs/xfs/xfs.o.template 996954 38116 4480 1039550 fdcbe fs/xfs/xfs.o.trace xfs.o.old is without any tracepoints. xfs.o.template uses the new TRACE_EVENT_TEMPLATE. xfs.o.trace uses the current TRACE_EVENT macros. Requested-by: Christoph Hellwig Signed-off-by: Steven Rostedt diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 2aac8a8..88a5b5a 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -280,6 +280,10 @@ static inline void tracepoint_synchronize_unregister(void) * TRACE_EVENT_FN to perform any (un)registration work. */ +#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) +#define DEFINE_EVENT(template, name, proto, args) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT_FN(name, proto, args, struct, \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 2a4b3bf..24498581 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -31,6 +31,10 @@ assign, print, reg, unreg) \ DEFINE_TRACE_FN(name, reg, unreg) +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ + DEFINE_TRACE(name) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -63,6 +67,8 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN +#undef TRACE_EVENT_TEMPLATE +#undef DEFINE_EVENT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c3417c1..2969f65 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -18,6 +18,26 @@ #include +/* + * TRACE_EVENT_TEMPLATE can be used to add a generic function + * handlers for events. That is, if all events have the same + * parameters and just have distinct trace points. + * Each tracepoint can be defined with DEFINE_EVENT and that + * will map the TRACE_EVENT_TEMPLATE to the tracepoint. + * + * TRACE_EVENT is a one to one mapping between tracepoint and template. + */ +#undef TRACE_EVENT +#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ + TRACE_EVENT_TEMPLATE(name, \ + PARAMS(proto), \ + PARAMS(args), \ + PARAMS(tstruct), \ + PARAMS(assign), \ + PARAMS(print)); \ + DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); + + #undef __field #define __field(type, item) type item; @@ -36,13 +56,15 @@ #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -#undef TRACE_EVENT -#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - char __data[0]; \ - }; \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + char __data[0]; \ + }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ static struct ftrace_event_call event_##name #undef __cpparg @@ -89,12 +111,15 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ tstruct; \ }; +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -170,8 +195,8 @@ #undef TP_perf_assign #define TP_perf_assign(args...) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ ftrace_format_##call(struct ftrace_event_call *unused, \ struct trace_seq *s) \ @@ -186,6 +211,9 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ return ret; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -255,10 +283,11 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ ftrace_print_symbols_seq(p, value, symbols); \ }) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ static enum print_line_t \ -ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +ftrace_raw_output_id_##call(int event_id, const char *name, \ + struct trace_iterator *iter, int flags) \ { \ struct trace_seq *s = &iter->seq; \ struct ftrace_raw_##call *field; \ @@ -268,7 +297,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ entry = iter->ent; \ \ - if (entry->type != event_##call.id) { \ + if (entry->type != event_id) { \ WARN_ON_ONCE(1); \ return TRACE_TYPE_UNHANDLED; \ } \ @@ -277,14 +306,25 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ p = &get_cpu_var(ftrace_event_seq); \ trace_seq_init(p); \ - ret = trace_seq_printf(s, #call ": " print); \ + ret = trace_seq_printf(s, "%s: ", name); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ put_cpu(); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ return TRACE_TYPE_HANDLED; \ } - + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ +static enum print_line_t \ +ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \ +{ \ + return ftrace_raw_output_id_##template(event_##name.id, \ + #name, iter, flags); \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext @@ -318,8 +358,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ @@ -335,6 +375,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ return ret; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -361,10 +404,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ __data_size += (len) * sizeof(type); #undef __string -#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) \ +#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ static inline int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ @@ -376,6 +419,9 @@ static inline int ftrace_get_offsets_##call( \ return __data_size; \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #ifdef CONFIG_EVENT_PROFILE @@ -397,19 +443,22 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, name, proto, args) \ \ -static void ftrace_profile_##call(proto); \ +static void ftrace_profile_##name(proto); \ \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *unused)\ +static int ftrace_profile_enable_##name(struct ftrace_event_call *unused)\ { \ - return register_trace_##call(ftrace_profile_##call); \ + return register_trace_##name(ftrace_profile_##name); \ } \ \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ +static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ { \ - unregister_trace_##call(ftrace_profile_##call); \ + unregister_trace_##name(ftrace_profile_##name); \ } #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) @@ -550,15 +599,13 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ - \ -static struct ftrace_event_call event_##call; \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ \ -static void ftrace_raw_event_##call(proto) \ +static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - struct ftrace_event_call *event_call = &event_##call; \ struct ring_buffer_event *event; \ struct ftrace_raw_##call *entry; \ struct ring_buffer *buffer; \ @@ -572,7 +619,7 @@ static void ftrace_raw_event_##call(proto) \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ \ event = trace_current_buffer_lock_reserve(&buffer, \ - event_##call.id, \ + event_call->id, \ sizeof(*entry) + __data_size, \ irq_flags, pc); \ if (!event) \ @@ -587,6 +634,14 @@ static void ftrace_raw_event_##call(proto) \ if (!filter_current_check_discard(buffer, event_call, entry, event)) \ trace_nowake_buffer_unlock_commit(buffer, \ event, irq_flags, pc); \ +} + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + \ +static void ftrace_raw_event_##call(proto) \ +{ \ + ftrace_raw_event_id_##template(&event_##call, args); \ } \ \ static int ftrace_raw_reg_event_##call(struct ftrace_event_call *unused)\ @@ -630,8 +685,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = ftrace_raw_init_event_##call, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ - .show_format = ftrace_format_##call, \ - .define_fields = ftrace_define_fields_##call, \ + .show_format = ftrace_format_##template, \ + .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } @@ -719,14 +774,15 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __perf_count #define __perf_count(c) __count = (c) -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -static void ftrace_profile_##call(proto) \ +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +static void \ +ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ + proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ extern int perf_swevent_get_recursion_context(void); \ extern void perf_swevent_put_recursion_context(int rctx); \ - struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ @@ -789,6 +845,15 @@ end_recursion: \ \ } +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ +static void ftrace_profile_##call(proto) \ +{ \ + struct ftrace_event_call *event_call = &event_##call; \ + \ + ftrace_profile_templ_##template(event_call, args); \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif /* CONFIG_EVENT_PROFILE */ -- cgit v0.10.2 From e5bc9721684e9412f3e0465222f317c362a8ab47 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:36:26 -0500 Subject: tracing: Create new DEFINE_EVENT_PRINT After creating the TRACE_EVENT_TEMPLATE I started to look at other trace points to see what duplication was made. I noticed that there are several trace points where they are almost identical except for the name and the output format. Since TRACE_EVENT_TEMPLATE was successful in bringing down the size of trace events, I added a DEFINE_EVENT_PRINT. DEFINE_EVENT_PRINT is used just like DEFINE_EVENT is. That is, the DEFINE_EVENT_PRINT also uses a TRACE_EVENT_TEMPLATE, but it allows the developer to overwrite the print format. If there are two or more TRACE_EVENTS that are identical except for the name and print, then they can be converted to use a TRACE_EVENT_TEMPLATE. Since the TRACE_EVENT_TEMPLATE already does the print output, the first trace event would have its print format held in the TRACE_EVENT_TEMPLATE and be defined with a DEFINE_EVENT. The rest will use the DEFINE_EVENT_PRINT and override the print format. Converting the sched trace points to both DEFINE_EVENT and DEFINE_EVENT_PRINT. Five were converted to DEFINE_EVENT and two were converted to DEFINE_EVENT_PRINT. I was able to get the following: $ size kernel/sched.o-* text data bss dec hex filename 79299 6776 2520 88595 15a13 kernel/sched.o-notrace 101941 11896 2584 116421 1c6c5 kernel/sched.o-templ 104779 11896 2584 119259 1d1db kernel/sched.o-trace sched.o-notrace is the scheduler compiled with no trace points. sched.o-templ is with the use of DEFINE_EVENT and DEFINE_EVENT_PRINT sched.o-trace is the current trace events. Signed-off-by: Steven Rostedt diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 88a5b5a..7063383 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -283,6 +283,8 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define TRACE_EVENT(name, proto, args, struct, assign, print) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 24498581..5d7d855 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -35,6 +35,10 @@ #define DEFINE_EVENT(template, name, proto, args) \ DEFINE_TRACE(name) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_TRACE(name) + #undef DECLARE_TRACE #define DECLARE_TRACE(name, proto, args) \ DEFINE_TRACE(name) @@ -69,6 +73,7 @@ #undef TRACE_EVENT_FN #undef TRACE_EVENT_TEMPLATE #undef DEFINE_EVENT +#undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ /* Only undef what we defined in this file */ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 2969f65..b046177 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -67,6 +67,10 @@ #define DEFINE_EVENT(template, name, proto, args) \ static struct ftrace_event_call event_##name +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #undef __cpparg #define __cpparg(arg...) arg @@ -120,6 +124,10 @@ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -198,15 +206,28 @@ #undef TRACE_EVENT_TEMPLATE #define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ +ftrace_format_setup_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ { \ struct ftrace_raw_##call field __attribute__((unused)); \ int ret = 0; \ \ tstruct; \ \ - trace_seq_printf(s, "\nprint fmt: " print); \ + return ret; \ +} \ + \ +static int \ +ftrace_format_##call(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##call(unused, s); \ + if (!ret) \ + return ret; \ + \ + ret = trace_seq_printf(s, "\nprint fmt: " print); \ \ return ret; \ } @@ -214,6 +235,23 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ +static int \ +ftrace_format_##name(struct ftrace_event_call *unused, \ + struct trace_seq *s) \ +{ \ + int ret = 0; \ + \ + ret = ftrace_format_setup_##template(unused, s); \ + if (!ret) \ + return ret; \ + \ + trace_seq_printf(s, "\nprint fmt: " print); \ + \ + return ret; \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -325,6 +363,38 @@ ftrace_raw_output_##name(struct trace_iterator *iter, int flags) \ #name, iter, flags); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ +static enum print_line_t \ +ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ +{ \ + struct trace_seq *s = &iter->seq; \ + struct ftrace_raw_##template *field; \ + struct trace_entry *entry; \ + struct trace_seq *p; \ + int ret; \ + \ + entry = iter->ent; \ + \ + if (entry->type != event_##call.id) { \ + WARN_ON_ONCE(1); \ + return TRACE_TYPE_UNHANDLED; \ + } \ + \ + field = (typeof(field))entry; \ + \ + p = &get_cpu_var(ftrace_event_seq); \ + trace_seq_init(p); \ + ret = trace_seq_printf(s, "%s: ", #call); \ + if (ret) \ + ret = trace_seq_printf(s, print); \ + put_cpu(); \ + if (!ret) \ + return TRACE_TYPE_PARTIAL_LINE; \ + \ + return TRACE_TYPE_HANDLED; \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #undef __field_ext @@ -378,6 +448,10 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -422,6 +496,10 @@ static inline int ftrace_get_offsets_##call( \ #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #ifdef CONFIG_EVENT_PROFILE @@ -461,6 +539,10 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ unregister_trace_##name(ftrace_profile_##name); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif @@ -674,7 +756,19 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ event_##call.id = id; \ INIT_LIST_HEAD(&event_##call.fields); \ return 0; \ -} \ +} + +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef TRACE_EVENT_TEMPLATE +#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) + +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -690,6 +784,23 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ _TRACE_PROFILE_INIT(call) \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ + \ +static struct ftrace_event_call __used \ +__attribute__((__aligned__(4))) \ +__attribute__((section("_ftrace_events"))) event_##call = { \ + .name = #call, \ + .system = __stringify(TRACE_SYSTEM), \ + .event = &ftrace_event_type_##call, \ + .raw_init = ftrace_raw_init_event_##call, \ + .regfunc = ftrace_raw_reg_event_##call, \ + .unregfunc = ftrace_raw_unreg_event_##call, \ + .show_format = ftrace_format_##call, \ + .define_fields = ftrace_define_fields_##template, \ + _TRACE_PROFILE_INIT(call) \ +} + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) /* @@ -854,6 +965,10 @@ static void ftrace_profile_##call(proto) \ ftrace_profile_templ_##template(event_call, args); \ } +#undef DEFINE_EVENT_PRINT +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args)) + #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) #endif /* CONFIG_EVENT_PROFILE */ -- cgit v0.10.2 From 75ec29ab848a7e92a41aaafaeb33d1afbc839be4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 18 Nov 2009 20:48:08 -0500 Subject: tracing: Convert some sched trace events to DEFINE_EVENT and _PRINT Converting some of the scheduler trace events to use the TRACE_EVENT_TEMPLATE, DEFINE_EVENT and DEFINE_EVENT_PRINT helped to save some space: $ size kernel/sched.o-* text data bss dec hex filename 79299 6776 2520 88595 15a13 kernel/sched.o-notrace 101941 11896 2584 116421 1c6c5 kernel/sched.o-templ 104779 11896 2584 119259 1d1db kernel/sched.o-trace sched.o-notrace is without any tracepoints compiled sched.o-templ is with this patch sched.o-trace is the tracepoints before this patch The trace events converted to DEFINE_EVENT: sched_wakeup, sched_wakeup_new, sched_process_free, sched_process_exit, and sched_stat_wait. The trace events converted to DEFINE_EVENT_PRINT: sched_stat_sleep and sched_stat_iowait. Note, since the TRACE_EVENT_TEMPLATE always uses a print, the sched_stat_wait print format is defined in the template and this template is used by sched_stat_sleep and sched_stat_iowait. But the later two override the print format. Signed-off-by: Steven Rostedt diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index b50b985..238f74b 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task, * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup, +TRACE_EVENT_TEMPLATE(sched_wakeup_template, TP_PROTO(struct rq *rq, struct task_struct *p, int success), @@ -110,38 +110,19 @@ TRACE_EVENT(sched_wakeup, __entry->success, __entry->target_cpu) ); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); + /* * Tracepoint for waking up a new task: * * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) - __field( int, target_cpu ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - __entry->target_cpu = task_cpu(p); - ), - - TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->target_cpu) -); +DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, + TP_PROTO(struct rq *rq, struct task_struct *p, int success), + TP_ARGS(rq, p, success)); /* * Tracepoint for task switches, performed by the scheduler: @@ -216,10 +197,7 @@ TRACE_EVENT(sched_migrate_task, __entry->orig_cpu, __entry->dest_cpu) ); -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, +TRACE_EVENT_TEMPLATE(sched_process_template, TP_PROTO(struct task_struct *p), @@ -242,29 +220,19 @@ TRACE_EVENT(sched_process_free, ); /* - * Tracepoint for a task exiting: + * Tracepoint for freeing a task: */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), +DEFINE_EVENT(sched_process_template, sched_process_free, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); + - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ), - - TP_printk("comm=%s pid=%d prio=%d", - __entry->comm, __entry->pid, __entry->prio) -); +/* + * Tracepoint for a task exiting: + */ +DEFINE_EVENT(sched_process_template, sched_process_exit, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); /* * Tracepoint for a waiting task: @@ -348,12 +316,7 @@ TRACE_EVENT(sched_signal_send, * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ - -/* - * Tracepoint for accounting wait time (time the task is runnable - * but not actually running due to scheduler contention). - */ -TRACE_EVENT(sched_stat_wait, +TRACE_EVENT_TEMPLATE(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), @@ -379,6 +342,37 @@ TRACE_EVENT(sched_stat_wait, (unsigned long long)__entry->delay) ); + +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +DEFINE_EVENT(sched_stat_template, sched_stat_wait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); + +/* + * Tracepoint for accounting sleep time (time the task is not runnable, + * including iowait, see below). + */ +DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay), + TP_printk("task: %s:%d sleep: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay)); + +/* + * Tracepoint for accounting iowait time (time the task is not runnable + * due to waiting on IO to complete). + */ +DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay), + TP_printk("task: %s:%d iowait: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay)); + /* * Tracepoint for accounting runtime (time the task is executing * on a CPU). @@ -412,66 +406,6 @@ TRACE_EVENT(sched_stat_runtime, (unsigned long long)__entry->vruntime) ); -/* - * Tracepoint for accounting sleep time (time the task is not runnable, - * including iowait, see below). - */ -TRACE_EVENT(sched_stat_sleep, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - -/* - * Tracepoint for accounting iowait time (time the task is not runnable - * due to waiting on IO to complete). - */ -TRACE_EVENT(sched_stat_iowait, - - TP_PROTO(struct task_struct *tsk, u64 delay), - - TP_ARGS(tsk, delay), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( u64, delay ) - ), - - TP_fast_assign( - memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); - __entry->pid = tsk->pid; - __entry->delay = delay; - ) - TP_perf_assign( - __perf_count(delay); - ), - - TP_printk("comm=%s pid=%d delay=%Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay) -); - #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ -- cgit v0.10.2 From 7539cf4b92be4aecc573ea962135f246a7a33401 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 24 Nov 2009 22:00:05 +0900 Subject: TOMOYO: Add recursive directory matching operator support. TOMOYO 1.7.1 has recursive directory matching operator support. I want to add it to TOMOYO for Linux 2.6.33 . ---------- [PATCH] TOMOYO: Add recursive directory matching operator support. This patch introduces new operator /\{dir\}/ which matches '/' + 'One or more repetitions of dir/' (e.g. /dir/ /dir/dir/ /dir/dir/dir/ ). Signed-off-by: Tetsuo Handa Acked-by: John Johansen Signed-off-by: James Morris diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index 3c8bd8e..e0d0354 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -187,6 +187,8 @@ bool tomoyo_is_correct_path(const char *filename, const s8 start_type, const s8 pattern_type, const s8 end_type, const char *function) { + const char *const start = filename; + bool in_repetition = false; bool contains_pattern = false; unsigned char c; unsigned char d; @@ -212,9 +214,13 @@ bool tomoyo_is_correct_path(const char *filename, const s8 start_type, if (c == '/') goto out; } - while ((c = *filename++) != '\0') { + while (1) { + c = *filename++; + if (!c) + break; if (c == '\\') { - switch ((c = *filename++)) { + c = *filename++; + switch (c) { case '\\': /* "\\" */ continue; case '$': /* "\$" */ @@ -231,6 +237,22 @@ bool tomoyo_is_correct_path(const char *filename, const s8 start_type, break; /* Must not contain pattern */ contains_pattern = true; continue; + case '{': /* "/\{" */ + if (filename - 3 < start || + *(filename - 3) != '/') + break; + if (pattern_type == -1) + break; /* Must not contain pattern */ + contains_pattern = true; + in_repetition = true; + continue; + case '}': /* "\}/" */ + if (*filename != '/') + break; + if (!in_repetition) + break; + in_repetition = false; + continue; case '0': /* "\ooo" */ case '1': case '2': @@ -246,6 +268,8 @@ bool tomoyo_is_correct_path(const char *filename, const s8 start_type, continue; /* pattern is not \000 */ } goto out; + } else if (in_repetition && c == '/') { + goto out; } else if (tomoyo_is_invalid(c)) { goto out; } @@ -254,6 +278,8 @@ bool tomoyo_is_correct_path(const char *filename, const s8 start_type, if (!contains_pattern) goto out; } + if (in_repetition) + goto out; return true; out: printk(KERN_DEBUG "%s: Invalid pathname '%s'\n", function, @@ -360,33 +386,6 @@ struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname) } /** - * tomoyo_path_depth - Evaluate the number of '/' in a string. - * - * @pathname: The string to evaluate. - * - * Returns path depth of the string. - * - * I score 2 for each of the '/' in the @pathname - * and score 1 if the @pathname ends with '/'. - */ -static int tomoyo_path_depth(const char *pathname) -{ - int i = 0; - - if (pathname) { - const char *ep = pathname + strlen(pathname); - if (pathname < ep--) { - if (*ep != '/') - i++; - while (pathname <= ep) - if (*ep-- == '/') - i += 2; - } - } - return i; -} - -/** * tomoyo_const_part_length - Evaluate the initial length without a pattern in a token. * * @filename: The string to evaluate. @@ -444,11 +443,10 @@ void tomoyo_fill_path_info(struct tomoyo_path_info *ptr) ptr->is_dir = len && (name[len - 1] == '/'); ptr->is_patterned = (ptr->const_len < len); ptr->hash = full_name_hash(name, len); - ptr->depth = tomoyo_path_depth(name); } /** - * tomoyo_file_matches_to_pattern2 - Pattern matching without '/' character + * tomoyo_file_matches_pattern2 - Pattern matching without '/' character * and "\-" pattern. * * @filename: The start of string to check. @@ -458,10 +456,10 @@ void tomoyo_fill_path_info(struct tomoyo_path_info *ptr) * * Returns true if @filename matches @pattern, false otherwise. */ -static bool tomoyo_file_matches_to_pattern2(const char *filename, - const char *filename_end, - const char *pattern, - const char *pattern_end) +static bool tomoyo_file_matches_pattern2(const char *filename, + const char *filename_end, + const char *pattern, + const char *pattern_end) { while (filename < filename_end && pattern < pattern_end) { char c; @@ -519,7 +517,7 @@ static bool tomoyo_file_matches_to_pattern2(const char *filename, case '*': case '@': for (i = 0; i <= filename_end - filename; i++) { - if (tomoyo_file_matches_to_pattern2( + if (tomoyo_file_matches_pattern2( filename + i, filename_end, pattern + 1, pattern_end)) return true; @@ -550,7 +548,7 @@ static bool tomoyo_file_matches_to_pattern2(const char *filename, j++; } for (i = 1; i <= j; i++) { - if (tomoyo_file_matches_to_pattern2( + if (tomoyo_file_matches_pattern2( filename + i, filename_end, pattern + 1, pattern_end)) return true; @@ -567,7 +565,7 @@ static bool tomoyo_file_matches_to_pattern2(const char *filename, } /** - * tomoyo_file_matches_to_pattern - Pattern matching without without '/' character. + * tomoyo_file_matches_pattern - Pattern matching without without '/' character. * * @filename: The start of string to check. * @filename_end: The end of string to check. @@ -576,7 +574,7 @@ static bool tomoyo_file_matches_to_pattern2(const char *filename, * * Returns true if @filename matches @pattern, false otherwise. */ -static bool tomoyo_file_matches_to_pattern(const char *filename, +static bool tomoyo_file_matches_pattern(const char *filename, const char *filename_end, const char *pattern, const char *pattern_end) @@ -589,10 +587,10 @@ static bool tomoyo_file_matches_to_pattern(const char *filename, /* Split at "\-" pattern. */ if (*pattern++ != '\\' || *pattern++ != '-') continue; - result = tomoyo_file_matches_to_pattern2(filename, - filename_end, - pattern_start, - pattern - 2); + result = tomoyo_file_matches_pattern2(filename, + filename_end, + pattern_start, + pattern - 2); if (first) result = !result; if (result) @@ -600,13 +598,79 @@ static bool tomoyo_file_matches_to_pattern(const char *filename, first = false; pattern_start = pattern; } - result = tomoyo_file_matches_to_pattern2(filename, filename_end, - pattern_start, pattern_end); + result = tomoyo_file_matches_pattern2(filename, filename_end, + pattern_start, pattern_end); return first ? result : !result; } /** + * tomoyo_path_matches_pattern2 - Do pathname pattern matching. + * + * @f: The start of string to check. + * @p: The start of pattern to compare. + * + * Returns true if @f matches @p, false otherwise. + */ +static bool tomoyo_path_matches_pattern2(const char *f, const char *p) +{ + const char *f_delimiter; + const char *p_delimiter; + + while (*f && *p) { + f_delimiter = strchr(f, '/'); + if (!f_delimiter) + f_delimiter = f + strlen(f); + p_delimiter = strchr(p, '/'); + if (!p_delimiter) + p_delimiter = p + strlen(p); + if (*p == '\\' && *(p + 1) == '{') + goto recursive; + if (!tomoyo_file_matches_pattern(f, f_delimiter, p, + p_delimiter)) + return false; + f = f_delimiter; + if (*f) + f++; + p = p_delimiter; + if (*p) + p++; + } + /* Ignore trailing "\*" and "\@" in @pattern. */ + while (*p == '\\' && + (*(p + 1) == '*' || *(p + 1) == '@')) + p += 2; + return !*f && !*p; + recursive: + /* + * The "\{" pattern is permitted only after '/' character. + * This guarantees that below "*(p - 1)" is safe. + * Also, the "\}" pattern is permitted only before '/' character + * so that "\{" + "\}" pair will not break the "\-" operator. + */ + if (*(p - 1) != '/' || p_delimiter <= p + 3 || *p_delimiter != '/' || + *(p_delimiter - 1) != '}' || *(p_delimiter - 2) != '\\') + return false; /* Bad pattern. */ + do { + /* Compare current component with pattern. */ + if (!tomoyo_file_matches_pattern(f, f_delimiter, p + 2, + p_delimiter - 2)) + break; + /* Proceed to next component. */ + f = f_delimiter; + if (!*f) + break; + f++; + /* Continue comparison. */ + if (tomoyo_path_matches_pattern2(f, p_delimiter + 1)) + return true; + f_delimiter = strchr(f, '/'); + } while (f_delimiter); + return false; /* Not matched. */ +} + +/** * tomoyo_path_matches_pattern - Check whether the given filename matches the given pattern. + * * @filename: The filename to check. * @pattern: The pattern to compare. * @@ -615,24 +679,24 @@ static bool tomoyo_file_matches_to_pattern(const char *filename, * The following patterns are available. * \\ \ itself. * \ooo Octal representation of a byte. - * \* More than or equals to 0 character other than '/'. - * \@ More than or equals to 0 character other than '/' or '.'. + * \* Zero or more repetitions of characters other than '/'. + * \@ Zero or more repetitions of characters other than '/' or '.'. * \? 1 byte character other than '/'. - * \$ More than or equals to 1 decimal digit. + * \$ One or more repetitions of decimal digits. * \+ 1 decimal digit. - * \X More than or equals to 1 hexadecimal digit. + * \X One or more repetitions of hexadecimal digits. * \x 1 hexadecimal digit. - * \A More than or equals to 1 alphabet character. + * \A One or more repetitions of alphabet characters. * \a 1 alphabet character. + * * \- Subtraction operator. + * + * /\{dir\}/ '/' + 'One or more repetitions of dir/' (e.g. /dir/ /dir/dir/ + * /dir/dir/dir/ ). */ bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename, const struct tomoyo_path_info *pattern) { - /* - if (!filename || !pattern) - return false; - */ const char *f = filename->name; const char *p = pattern->name; const int len = pattern->const_len; @@ -640,37 +704,15 @@ bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename, /* If @pattern doesn't contain pattern, I can use strcmp(). */ if (!pattern->is_patterned) return !tomoyo_pathcmp(filename, pattern); - /* Dont compare if the number of '/' differs. */ - if (filename->depth != pattern->depth) + /* Don't compare directory and non-directory. */ + if (filename->is_dir != pattern->is_dir) return false; /* Compare the initial length without patterns. */ if (strncmp(f, p, len)) return false; f += len; p += len; - /* Main loop. Compare each directory component. */ - while (*f && *p) { - const char *f_delimiter = strchr(f, '/'); - const char *p_delimiter = strchr(p, '/'); - if (!f_delimiter) - f_delimiter = f + strlen(f); - if (!p_delimiter) - p_delimiter = p + strlen(p); - if (!tomoyo_file_matches_to_pattern(f, f_delimiter, - p, p_delimiter)) - return false; - f = f_delimiter; - if (*f) - f++; - p = p_delimiter; - if (*p) - p++; - } - /* Ignore trailing "\*" and "\@" in @pattern. */ - while (*p == '\\' && - (*(p + 1) == '*' || *(p + 1) == '@')) - p += 2; - return !*f && !*p; + return tomoyo_path_matches_pattern2(f, p); } /** diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 31df541..92169d2 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -56,9 +56,6 @@ struct tomoyo_page_buffer { * (5) "is_patterned" is a bool which is true if "name" contains wildcard * characters, false otherwise. This allows TOMOYO to use "hash" and * strcmp() for string comparison if "is_patterned" is false. - * (6) "depth" is calculated using the number of "/" characters in "name". - * This allows TOMOYO to avoid comparing two pathnames which never match - * (e.g. whether "/var/www/html/index.html" matches "/tmp/sh-thd-\$"). */ struct tomoyo_path_info { const char *name; @@ -66,7 +63,6 @@ struct tomoyo_path_info { u16 const_len; /* = tomoyo_const_part_length(name) */ bool is_dir; /* = tomoyo_strendswith(name, "/") */ bool is_patterned; /* = tomoyo_path_contains_pattern(name) */ - u16 depth; /* = tomoyo_path_depth(name) */ }; /* -- cgit v0.10.2 From 0d0bea5ea4a0e91feff22ac5e32e14ff3a682247 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 25 Nov 2009 01:14:58 -0600 Subject: perf tools: Add 'signed' flag setting back into trace-event-parse.c Commit 13999e59343b042b0807be2df6ae5895d29782a0 (perf tools: Handle the case with and without the "signed" trace field) removed code to set the FIELD_IS_SIGNED flag that was originally added by commit 26a50744b21fff65bd754874072857bee8967f4d (tracing/events: Add 'signed' field to format files). This adds it back. Signed-off-by: Tom Zanussi Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1259133299-23594-2-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index eae5605..7021dc1 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -941,7 +941,8 @@ static int event_read_fields(struct event *event, struct format_field **fields) if (read_expect_type(EVENT_ITEM, &token)) goto fail; - /* add signed type */ + if (strtoul(token, NULL, 0)) + field->flags |= FIELD_IS_SIGNED; free_token(token); if (read_expected(EVENT_OP, ";") < 0) -- cgit v0.10.2 From 99df5a6a215f026e62287083de2b44b22edd3623 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 25 Nov 2009 01:14:59 -0600 Subject: trace/syscalls: Change ret param in struct syscall_trace_exit to long Commit ee949a86b3aef15845ea677aa60231008de62672 ("tracing/syscalls: Use long for syscall ret format and field definitions") changed the syscall exit return type to long, but forgot to change it in the struct. Signed-off-by: Tom Zanussi Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1259133299-23594-3-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4da6ede..1d7f483 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -100,7 +100,7 @@ struct syscall_trace_enter { struct syscall_trace_exit { struct trace_entry ent; int nr; - unsigned long ret; + long ret; }; struct kprobe_trace_entry { -- cgit v0.10.2 From 273bee27fa9f79d94b78c83506016f2e41e78983 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 25 Nov 2009 08:46:28 +0900 Subject: x86: Fix iommu=soft boot option iommu=soft boot option forces the kernel to use swiotlb. ( This has the side-effect of enabling the swiotlb over the GART if this boot option is provided. This is the desired behavior of the swiotlb boot option and works like that for all other hw-IOMMU drivers. ) Signed-off-by: FUJITA Tomonori Cc: yinghai@kernel.org LKML-Reference: <20091125084611O.fujita.tomonori@lab.ntt.co.jp> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index e36e71d..e3c0a66 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -50,6 +50,8 @@ static struct dma_map_ops swiotlb_dma_ops = { */ int __init pci_swiotlb_init(void) { + int use_swiotlb = swiotlb | swiotlb_force; + /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 if (!no_iommu && max_pfn > MAX_DMA32_PFN) @@ -63,5 +65,5 @@ int __init pci_swiotlb_init(void) dma_ops = &swiotlb_dma_ops; } - return swiotlb_force; + return use_swiotlb; } -- cgit v0.10.2 From 93335a21557e80f6a99bc2812c634e488139043c Mon Sep 17 00:00:00 2001 From: Shmulik Ladkani Date: Wed, 25 Nov 2009 15:23:41 +0200 Subject: sched.c: Call debug_show_all_locks() when dumping all tasks In commit v2.6.21-691-g39bc89f ("make SysRq-T show all tasks again") the interface of show_state_filter() was changed: zero valued 'state_filter' specifies "dump all tasks" (instead of -1). However, the condition for calling debug_show_all_locks() ("show locks if all tasks are dumped") was not updated accordingly. Signed-off-by: Shmulik Ladkani Cc: peterz@infradead.org LKML-Reference: <4b0d2fe4.0ab6660a.6437.3cfc@mx.google.com> Signed-off-by: Ingo Molnar diff --git a/kernel/sched.c b/kernel/sched.c index 010d5e1..a57c6ae 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6915,7 +6915,7 @@ void show_state_filter(unsigned long state_filter) /* * Only show locks if all tasks are dumped: */ - if (state_filter == -1) + if (!state_filter) debug_show_all_locks(); } -- cgit v0.10.2 From 28b4e0d86acf59ae3bc422921138a4958458326e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 25 Nov 2009 22:24:44 +0900 Subject: x86: Rename global percpu symbol dr7 to cpu_dr7 Percpu symbols now occupy the same namespace as other global symbols and as such short global symbols without subsystem prefix tend to collide with local variables. dr7 percpu variable used by x86 was hit by this. Rename it to cpu_dr7. The rename also makes it more consistent with its fellow cpu_debugreg percpu variable. Signed-off-by: Tejun Heo Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Rusty Russell Cc: Christoph Lameter Cc: Linus Torvalds , Cc: Andrew Morton LKML-Reference: <20091125115856.GA17856@elte.hu> Signed-off-by: Ingo Molnar Reported-by: Stephen Rothwell diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index fdabd84..8240f76 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -75,7 +75,7 @@ */ #ifdef __KERNEL__ -DECLARE_PER_CPU(unsigned long, dr7); +DECLARE_PER_CPU(unsigned long, cpu_dr7); static inline void hw_breakpoint_disable(void) { @@ -91,7 +91,7 @@ static inline void hw_breakpoint_disable(void) static inline int hw_breakpoint_active(void) { - return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK; + return __get_cpu_var(cpu_dr7) & DR_GLOBAL_ENABLE_MASK; } extern void aout_dump_debugregs(struct user *dump); diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 4d267fb..92ea5aa 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -46,8 +46,8 @@ #include /* Per cpu debug control register value */ -DEFINE_PER_CPU(unsigned long, dr7); -EXPORT_PER_CPU_SYMBOL(dr7); +DEFINE_PER_CPU(unsigned long, cpu_dr7); +EXPORT_PER_CPU_SYMBOL(cpu_dr7); /* Per cpu debug address registers values */ static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); @@ -118,7 +118,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) set_debugreg(info->address, i); __get_cpu_var(cpu_debugreg[i]) = info->address; - dr7 = &__get_cpu_var(dr7); + dr7 = &__get_cpu_var(cpu_dr7); *dr7 |= encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); @@ -153,7 +153,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) return; - dr7 = &__get_cpu_var(dr7); + dr7 = &__get_cpu_var(cpu_dr7); *dr7 &= ~encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); @@ -437,7 +437,7 @@ void hw_breakpoint_restore(void) set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); set_debugreg(current->thread.debugreg6, 6); - set_debugreg(__get_cpu_var(dr7), 7); + set_debugreg(__get_cpu_var(cpu_dr7), 7); } EXPORT_SYMBOL_GPL(hw_breakpoint_restore); -- cgit v0.10.2 From 7ac074340480018681a0d72b324d4487543bdc0e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 25 Nov 2009 13:22:21 -0500 Subject: ring-buffer-benchmark: Add parameters to set produce/consumer priorities Running the ring-buffer-benchmark's threads at the lowest priority may work well for keeping it in the background, but it is not appropriate for the benchmarks. This patch adds 4 parameters to the module: consumer_fifo consumer_nice producer_fifo producer_nice By default the consumer and producer still run at nice +19. If the *_fifo options are set, they will override the *_nice values. modprobe ring_buffer_benchmark consumer_nice=0 producer_fifo=10 The above will set the consumer thread to a nice value of 0, and the producer thread to a RT SCHED_FIFO priority of 10. Note, this patch also fixes a bug where calling set_user_nice on the consumer thread would oops the kernel when the parameter "disable_reader" is set. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 3875d49..b2477ca 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -39,6 +39,24 @@ static int write_iteration = 50; module_param(write_iteration, uint, 0644); MODULE_PARM_DESC(write_iteration, "# of writes between timestamp readings"); +static int producer_nice = 19; +static int consumer_nice = 19; + +static int producer_fifo = -1; +static int consumer_fifo = -1; + +module_param(producer_nice, uint, 0644); +MODULE_PARM_DESC(producer_nice, "nice prio for producer"); + +module_param(consumer_nice, uint, 0644); +MODULE_PARM_DESC(consumer_nice, "nice prio for consumer"); + +module_param(producer_fifo, uint, 0644); +MODULE_PARM_DESC(producer_fifo, "fifo prio for producer"); + +module_param(consumer_fifo, uint, 0644); +MODULE_PARM_DESC(consumer_fifo, "fifo prio for consumer"); + static int read_events; static int kill_test; @@ -270,6 +288,27 @@ static void ring_buffer_producer(void) if (kill_test) trace_printk("ERROR!\n"); + + if (!disable_reader) { + if (consumer_fifo < 0) + trace_printk("Running Consumer at nice: %d\n", + consumer_nice); + else + trace_printk("Running Consumer at SCHED_FIFO %d\n", + consumer_fifo); + } + if (producer_fifo < 0) + trace_printk("Running Producer at nice: %d\n", + producer_nice); + else + trace_printk("Running Producer at SCHED_FIFO %d\n", + producer_fifo); + + /* Let the user know that the test is running at low priority */ + if (producer_fifo < 0 && consumer_fifo < 0 && + producer_nice == 19 && consumer_nice == 19) + trace_printk("WARNING!!! This test is running at lowest priority.\n"); + trace_printk("Time: %lld (usecs)\n", time); trace_printk("Overruns: %lld\n", overruns); if (disable_reader) @@ -402,8 +441,23 @@ static int __init ring_buffer_benchmark_init(void) /* * Run them as low-prio background tasks by default: */ - set_user_nice(consumer, 19); - set_user_nice(producer, 19); + if (!disable_reader) { + if (consumer_fifo >= 0) { + struct sched_param param = { + .sched_priority = consumer_fifo + }; + sched_setscheduler(consumer, SCHED_FIFO, ¶m); + } else + set_user_nice(consumer, consumer_nice); + } + + if (producer_fifo >= 0) { + struct sched_param param = { + .sched_priority = consumer_fifo + }; + sched_setscheduler(producer, SCHED_FIFO, ¶m); + } else + set_user_nice(producer, producer_nice); return 0; -- cgit v0.10.2 From b8007ef7422270864eae523cb38d7522a53a94d3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 Nov 2009 13:45:32 +0800 Subject: tracing: Separate raw syscall from syscall tracer The current syscall tracer mixes raw syscalls and real syscalls. echo 1 > events/syscalls/enable And we get these from the output: (XXXX insteads " grep-20914 [001] 588211.446347" .. etc) XXXX: sys_read(fd: 3, buf: 80609a8, count: 7000) XXXX: sys_enter: NR 3 (3, 80609a8, 7000, a, 1000, bfce8ef8) XXXX: sys_read -> 0x138 XXXX: sys_exit: NR 3 = 312 XXXX: sys_read(fd: 3, buf: 8060ae0, count: 7000) XXXX: sys_enter: NR 3 (3, 8060ae0, 7000, a, 1000, bfce8ef8) XXXX: sys_read -> 0x138 XXXX: sys_exit: NR 3 = 312 There are 2 drawbacks here. A) two almost identical records are saved in ringbuffer when a syscall enters or exits. (4 records for every syscall) This wastes precious space in the ring buffer. B) the lines including "sys_enter/sys_exit" produces hardly any useful information for the output (no labels). The user can use this method to prevent these drawbacks: echo 1 > events/syscalls/enable echo 0 > events/syscalls/sys_enter/enable echo 0 > events/syscalls/sys_exit/enable But this is not user friendly. So we separate raw syscall from syscall tracer. After this fix applied: syscall tracer's output (echo 1 > events/syscalls/enable): XXXX: sys_read(fd: 3, buf: bfe87d88, count: 200) XXXX: sys_read -> 0x200 XXXX: sys_fstat64(fd: 3, statbuf: bfe87c98) XXXX: sys_fstat64 -> 0x0 XXXX: sys_close(fd: 3) raw syscall tracer's output (echo 1 > events/raw_syscalls/enable): XXXX: sys_enter: NR 175 (0, bf92bf18, bf92bf98, 8, b748cff4, bf92bef8) XXXX: sys_exit: NR 175 = 0 XXXX: sys_enter: NR 175 (2, bf92bf98, 0, 8, b748cff4, bf92bef8) XXXX: sys_exit: NR 175 = 0 XXXX: sys_enter: NR 3 (9, bf927f9c, 4000, b77e2518, b77dce60, bf92bff8) Signed-off-by: Lai Jiangshan LKML-Reference: <4AEFC37C.5080609@cn.fujitsu.com> Signed-off-by: Steven Rostedt diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 397dff2..fb726ac 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM -#define TRACE_SYSTEM syscalls +#define TRACE_SYSTEM raw_syscalls +#define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_EVENTS_SYSCALLS_H -- cgit v0.10.2 From b803090615ccec669681ff85ce28671e7bfefa3d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 08:17:31 +0100 Subject: x86: dumpstack: Clean up the x86_stack_ids[][] initalization and other details Make the initialization more readable, plus tidy up a few small visual details as well. No change in functionality. LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f7dd2a7..e0ed4c7 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -10,9 +10,9 @@ #include #include #include +#include #include #include -#include #include @@ -35,6 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!stack) { unsigned long dummy; + stack = &dummy; if (task && task != current) stack = (unsigned long *)task->thread.sp; @@ -57,8 +58,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, - data, NULL, &graph); + bp = print_context_stack(context, stack, bp, ops, data, NULL, &graph); stack = (unsigned long *)context->previous_esp; if (!stack) @@ -72,7 +72,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *stack; int i; @@ -156,4 +156,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } - diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a071e6b..cfec478 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -10,26 +10,28 @@ #include #include #include +#include #include #include -#include #include #include "dumpstack.h" +#define N_EXCEPTION_STACKS_END \ + (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) static char x86_stack_ids[][8] = { - [DEBUG_STACK - 1] = "#DB", - [NMI_STACK - 1] = "NMI", - [DOUBLEFAULT_STACK - 1] = "#DF", - [STACKFAULT_STACK - 1] = "#SS", - [MCE_STACK - 1] = "#MC", + [ DEBUG_STACK-1 ] = "#DB", + [ NMI_STACK-1 ] = "NMI", + [ DOUBLEFAULT_STACK-1 ] = "#DF", + [ STACKFAULT_STACK-1 ] = "#SS", + [ MCE_STACK-1 ] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... - N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" + [ N_EXCEPTION_STACKS ... + N_EXCEPTION_STACKS_END ] = "#DB[?]" #endif - }; +}; int x86_is_stack_id(int id, char *name) { @@ -37,7 +39,7 @@ int x86_is_stack_id(int id, char *name) } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, - unsigned *usedp, char **idp) + unsigned *usedp, char **idp) { unsigned k; @@ -202,7 +204,7 @@ EXPORT_SYMBOL(dump_trace); void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *sp, unsigned long bp, char *log_lvl) + unsigned long *sp, unsigned long bp, char *log_lvl) { unsigned long *stack; int i; @@ -303,4 +305,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } - -- cgit v0.10.2 From 67f2de0bf9141dd9fe9189d0caaa28d7ad21a523 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 08:29:10 +0100 Subject: x86: dumpstack, 64-bit: Disable preemption when walking the IRQ/exception stacks This warning: [ 847.140022] rb_producer D 0000000000000000 5928 519 2 0x00000000 [ 847.203627] BUG: using smp_processor_id() in preemptible [00000000] code: khungtaskd/517 [ 847.207360] caller is show_stack_log_lvl+0x2e/0x241 [ 847.210364] Pid: 517, comm: khungtaskd Not tainted 2.6.32-rc8-tip+ #13761 [ 847.213395] Call Trace: [ 847.215847] [] debug_smp_processor_id+0x1f0/0x20a [ 847.216809] [] show_stack_log_lvl+0x2e/0x241 [ 847.220027] [] show_stack+0x1c/0x1e [ 847.223365] [] sched_show_task+0xe4/0xe9 [ 847.226694] [] check_hung_task+0x140/0x199 [ 847.230261] [] check_hung_uninterruptible_tasks+0x1b7/0x20f [ 847.233371] [] ? watchdog+0x0/0x50 [ 847.236683] [] watchdog+0x4e/0x50 [ 847.240034] [] kthread+0x97/0x9f [ 847.243372] [] child_rip+0xa/0x20 [ 847.246690] [] ? restore_args+0x0/0x30 [ 847.250019] [] ? _spin_lock+0xe/0x10 [ 847.253351] [] ? kthread+0x0/0x9f [ 847.256833] [] ? child_rip+0x0/0x20 Happens because on preempt-RCU, khungd calls show_stack() with preemption enabled. Make sure we are not preemptible while walking the IRQ and exception stacks on 64-bit. (32-bit stack dumping is preemption safe.) Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index cfec478..8e74093 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -206,19 +206,22 @@ void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, unsigned long bp, char *log_lvl) { + unsigned long *irq_stack_end; + unsigned long *irq_stack; unsigned long *stack; + int cpu; int i; - const int cpu = smp_processor_id(); - unsigned long *irq_stack_end = - (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); - unsigned long *irq_stack = - (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); + + preempt_disable(); + cpu = smp_processor_id(); + + irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); + irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); /* - * debugging aid: "show_stack(NULL, NULL);" prints the - * back trace for this cpu. + * Debugging aid: "show_stack(NULL, NULL);" prints the + * back trace for this cpu: */ - if (sp == NULL) { if (task) sp = (unsigned long *)task->thread.sp; @@ -242,6 +245,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } + preempt_enable(); + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } -- cgit v0.10.2 From 091ad3658e3c76c5fb05f65bfb64a0246f8f31b5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 09:04:55 +0100 Subject: events: Rename TRACE_EVENT_TEMPLATE() to DECLARE_EVENT_CLASS() It is not quite obvious at first sight what TRACE_EVENT_TEMPLATE does: does it define an event as well beyond defining a template? To clarify this, rename it to DECLARE_EVENT_CLASS, which follows the various 'DECLARE_*()' idioms we already have in the kernel: DECLARE_EVENT_CLASS(class) DEFINE_EVENT(class, event1) DEFINE_EVENT(class, event2) DEFINE_EVENT(class, event3) To complete this logic we should also rename TRACE_EVENT() to: DEFINE_SINGLE_EVENT(single_event) ... but in a more quiet moment of the kernel cycle. Cc: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7063383..f59604e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -280,7 +280,7 @@ static inline void tracepoint_synchronize_unregister(void) * TRACE_EVENT_FN to perform any (un)registration work. */ -#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) #define DEFINE_EVENT(template, name, proto, args) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h index 5d7d855..5acfb1e 100644 --- a/include/trace/define_trace.h +++ b/include/trace/define_trace.h @@ -71,7 +71,7 @@ #undef TRACE_EVENT #undef TRACE_EVENT_FN -#undef TRACE_EVENT_TEMPLATE +#undef DECLARE_EVENT_CLASS #undef DEFINE_EVENT #undef DEFINE_EVENT_PRINT #undef TRACE_HEADER_MULTI_READ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 238f74b..5ce7950 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -83,7 +83,7 @@ TRACE_EVENT(sched_wait_task, * (NOTE: the 'rq' argument is not used by generic trace events, * but used by the latency tracer plugin. ) */ -TRACE_EVENT_TEMPLATE(sched_wakeup_template, +DECLARE_EVENT_CLASS(sched_wakeup_template, TP_PROTO(struct rq *rq, struct task_struct *p, int success), @@ -197,7 +197,7 @@ TRACE_EVENT(sched_migrate_task, __entry->orig_cpu, __entry->dest_cpu) ); -TRACE_EVENT_TEMPLATE(sched_process_template, +DECLARE_EVENT_CLASS(sched_process_template, TP_PROTO(struct task_struct *p), @@ -316,7 +316,7 @@ TRACE_EVENT(sched_signal_send, * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ -TRACE_EVENT_TEMPLATE(sched_stat_template, +DECLARE_EVENT_CLASS(sched_stat_template, TP_PROTO(struct task_struct *tsk, u64 delay), diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index b046177..2c9c073 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -19,17 +19,17 @@ #include /* - * TRACE_EVENT_TEMPLATE can be used to add a generic function + * DECLARE_EVENT_CLASS can be used to add a generic function * handlers for events. That is, if all events have the same * parameters and just have distinct trace points. * Each tracepoint can be defined with DEFINE_EVENT and that - * will map the TRACE_EVENT_TEMPLATE to the tracepoint. + * will map the DECLARE_EVENT_CLASS to the tracepoint. * * TRACE_EVENT is a one to one mapping between tracepoint and template. */ #undef TRACE_EVENT #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ - TRACE_EVENT_TEMPLATE(name, \ + DECLARE_EVENT_CLASS(name, \ PARAMS(proto), \ PARAMS(args), \ PARAMS(tstruct), \ @@ -56,8 +56,8 @@ #undef TP_STRUCT__entry #define TP_STRUCT__entry(args...) args -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(name, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ struct ftrace_raw_##name { \ struct trace_entry ent; \ tstruct \ @@ -115,8 +115,8 @@ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ struct ftrace_data_offsets_##call { \ tstruct; \ }; @@ -203,8 +203,8 @@ #undef TP_perf_assign #define TP_perf_assign(args...) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ ftrace_format_setup_##call(struct ftrace_event_call *unused, \ struct trace_seq *s) \ @@ -321,8 +321,8 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ ftrace_print_symbols_seq(p, value, symbols); \ }) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static enum print_line_t \ ftrace_raw_output_id_##call(int event_id, const char *name, \ struct trace_iterator *iter, int flags) \ @@ -428,8 +428,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef __string #define __string(item, src) __dynamic_array(char, item, -1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, func, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ @@ -480,8 +480,8 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ #undef __string #define __string(item, src) __dynamic_array(char, item, strlen(src) + 1) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline int ftrace_get_offsets_##call( \ struct ftrace_data_offsets_##call *__data_offsets, proto) \ { \ @@ -521,8 +521,8 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ @@ -681,8 +681,8 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ \ static void ftrace_raw_event_id_##call(struct ftrace_event_call *event_call, \ proto) \ @@ -764,8 +764,8 @@ static int ftrace_raw_init_event_##call(struct ftrace_event_call *unused)\ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ @@ -885,8 +885,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef __perf_count #define __perf_count(c) __count = (c) -#undef TRACE_EVENT_TEMPLATE -#define TRACE_EVENT_TEMPLATE(call, proto, args, tstruct, assign, print) \ +#undef DECLARE_EVENT_CLASS +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static void \ ftrace_profile_templ_##call(struct ftrace_event_call *event_call, \ proto) \ -- cgit v0.10.2 From 925684d6d589e40e41007edf47c69e729d911263 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:03:23 +0800 Subject: tracing: Convert module refcnt events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 29854 1980 128 31962 7cda kernel/module.o.old 28750 1980 128 30858 788a kernel/module.o Two events are converted: module_refcnt: module_get, module_put No change in functionality. Signed-off-by: Li Zefan Cc: Rusty Russell Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E283B.3010508@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/module.h b/include/trace/events/module.h index 84160fb..4b0f48b 100644 --- a/include/trace/events/module.h +++ b/include/trace/events/module.h @@ -51,7 +51,7 @@ TRACE_EVENT(module_free, TP_printk("%s", __get_str(name)) ); -TRACE_EVENT(module_get, +DECLARE_EVENT_CLASS(module_refcnt, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), @@ -73,26 +73,18 @@ TRACE_EVENT(module_get, __get_str(name), (void *)__entry->ip, __entry->refcnt) ); -TRACE_EVENT(module_put, +DEFINE_EVENT(module_refcnt, module_get, TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_ARGS(mod, ip, refcnt), + TP_ARGS(mod, ip, refcnt) +); - TP_STRUCT__entry( - __field( unsigned long, ip ) - __field( int, refcnt ) - __string( name, mod->name ) - ), +DEFINE_EVENT(module_refcnt, module_put, - TP_fast_assign( - __entry->ip = ip; - __entry->refcnt = refcnt; - __assign_str(name, mod->name); - ), + TP_PROTO(struct module *mod, unsigned long ip, int refcnt), - TP_printk("%s call_site=%pf refcnt=%d", - __get_str(name), (void *)__entry->ip, __entry->refcnt) + TP_ARGS(mod, ip, refcnt) ); TRACE_EVENT(module_request, -- cgit v0.10.2 From 53d0422c2d10808fddb2c30859193bfea164c7e3 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:04:10 +0800 Subject: tracing: Convert some kmem events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 333987 69800 27228 431015 693a7 mm/built-in.o.old 330030 69800 27228 427058 68432 mm/built-in.o 8 events are converted: kmem_alloc: kmalloc, kmem_cache_alloc kmem_alloc_node: kmalloc_node, kmem_cache_alloc_node kmem_free: kfree, kmem_cache_free mm_page: mm_page_alloc_zone_locked, mm_page_pcpu_drain No change in functionality. Signed-off-by: Li Zefan Acked-by: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mel Gorman LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index eaf46bd..3adca0c 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -44,7 +44,7 @@ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ ) : "GFP_NOWAIT" -TRACE_EVENT(kmalloc, +DECLARE_EVENT_CLASS(kmem_alloc, TP_PROTO(unsigned long call_site, const void *ptr, @@ -78,41 +78,23 @@ TRACE_EVENT(kmalloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(kmem_cache_alloc, +DEFINE_EVENT(kmem_alloc, kmalloc, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - ), +DEFINE_EVENT(kmem_alloc, kmem_cache_alloc, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags)) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags) ); -TRACE_EVENT(kmalloc_node, +DECLARE_EVENT_CLASS(kmem_alloc_node, TP_PROTO(unsigned long call_site, const void *ptr, @@ -150,45 +132,25 @@ TRACE_EVENT(kmalloc_node, __entry->node) ); -TRACE_EVENT(kmem_cache_alloc_node, +DEFINE_EVENT(kmem_alloc_node, kmalloc_node, - TP_PROTO(unsigned long call_site, - const void *ptr, - size_t bytes_req, - size_t bytes_alloc, - gfp_t gfp_flags, - int node), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node), + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - __field( size_t, bytes_req ) - __field( size_t, bytes_alloc ) - __field( gfp_t, gfp_flags ) - __field( int, node ) - ), +DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - __entry->bytes_req = bytes_req; - __entry->bytes_alloc = bytes_alloc; - __entry->gfp_flags = gfp_flags; - __entry->node = node; - ), + TP_PROTO(unsigned long call_site, const void *ptr, + size_t bytes_req, size_t bytes_alloc, + gfp_t gfp_flags, int node), - TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d", - __entry->call_site, - __entry->ptr, - __entry->bytes_req, - __entry->bytes_alloc, - show_gfp_flags(__entry->gfp_flags), - __entry->node) + TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) ); -TRACE_EVENT(kfree, +DECLARE_EVENT_CLASS(kmem_free, TP_PROTO(unsigned long call_site, const void *ptr), @@ -207,23 +169,18 @@ TRACE_EVENT(kfree, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); -TRACE_EVENT(kmem_cache_free, +DEFINE_EVENT(kmem_free, kfree, TP_PROTO(unsigned long call_site, const void *ptr), - TP_ARGS(call_site, ptr), + TP_ARGS(call_site, ptr) +); - TP_STRUCT__entry( - __field( unsigned long, call_site ) - __field( const void *, ptr ) - ), +DEFINE_EVENT(kmem_free, kmem_cache_free, - TP_fast_assign( - __entry->call_site = call_site; - __entry->ptr = ptr; - ), + TP_PROTO(unsigned long call_site, const void *ptr), - TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) + TP_ARGS(call_site, ptr) ); TRACE_EVENT(mm_page_free_direct, @@ -299,7 +256,7 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); -TRACE_EVENT(mm_page_alloc_zone_locked, +DECLARE_EVENT_CLASS(mm_page, TP_PROTO(struct page *page, unsigned int order, int migratetype), @@ -325,29 +282,22 @@ TRACE_EVENT(mm_page_alloc_zone_locked, __entry->order == 0) ); -TRACE_EVENT(mm_page_pcpu_drain, +DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked, - TP_PROTO(struct page *page, int order, int migratetype), + TP_PROTO(struct page *page, unsigned int order, int migratetype), - TP_ARGS(page, order, migratetype), + TP_ARGS(page, order, migratetype) +); - TP_STRUCT__entry( - __field( struct page *, page ) - __field( int, order ) - __field( int, migratetype ) - ), +DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain, - TP_fast_assign( - __entry->page = page; - __entry->order = order; - __entry->migratetype = migratetype; - ), + TP_PROTO(struct page *page, unsigned int order, int migratetype), + + TP_ARGS(page, order, migratetype), TP_printk("page=%p pfn=%lu order=%d migratetype=%d", - __entry->page, - page_to_pfn(__entry->page), - __entry->order, - __entry->migratetype) + __entry->page, page_to_pfn(__entry->page), + __entry->order, __entry->migratetype) ); TRACE_EVENT(mm_page_alloc_extfrag, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2bc2ac6..bdb22f5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -48,12 +48,14 @@ #include #include #include -#include #include #include #include "internal.h" +#define CREATE_TRACE_POINTS +#include + /* * Array of node states. */ diff --git a/mm/util.c b/mm/util.c index 7c35ad9..15d1975 100644 --- a/mm/util.c +++ b/mm/util.c @@ -6,9 +6,6 @@ #include #include -#define CREATE_TRACE_POINTS -#include - /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate -- cgit v0.10.2 From c467307c1a812c3150b27a68c2b2d3397bb40a4f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:04:31 +0800 Subject: tracing: Convert softirq events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 12781 952 36 13769 35c9 kernel/softirq.o.old 11981 952 32 12965 32a5 kernel/softirq.o Two events are converted: softirq: softirq_entry, softirq_exit No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E287F.4030708@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index dcfcd44..0e4cfb6 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -82,18 +82,7 @@ TRACE_EVENT(irq_handler_exit, __entry->irq, __entry->ret ? "handled" : "unhandled") ); -/** - * softirq_entry - called immediately before the softirq handler - * @h: pointer to struct softirq_action - * @vec: pointer to first struct softirq_action in softirq_vec array - * - * The @h parameter, contains a pointer to the struct softirq_action - * which has a pointer to the action handler that is called. By subtracting - * the @vec pointer from the @h pointer, we can determine the softirq - * number. Also, when used in combination with the softirq_exit tracepoint - * we can determine the softirq latency. - */ -TRACE_EVENT(softirq_entry, +DECLARE_EVENT_CLASS(softirq, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), @@ -112,6 +101,24 @@ TRACE_EVENT(softirq_entry, ); /** + * softirq_entry - called immediately before the softirq handler + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter, contains a pointer to the struct softirq_action + * which has a pointer to the action handler that is called. By subtracting + * the @vec pointer from the @h pointer, we can determine the softirq + * number. Also, when used in combination with the softirq_exit tracepoint + * we can determine the softirq latency. + */ +DEFINE_EVENT(softirq, softirq_entry, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + +/** * softirq_exit - called immediately after the softirq handler returns * @h: pointer to struct softirq_action * @vec: pointer to first struct softirq_action in softirq_vec array @@ -122,22 +129,11 @@ TRACE_EVENT(softirq_entry, * combination with the softirq_entry tracepoint we can determine the softirq * latency. */ -TRACE_EVENT(softirq_exit, +DEFINE_EVENT(softirq, softirq_exit, TP_PROTO(struct softirq_action *h, struct softirq_action *vec), - TP_ARGS(h, vec), - - TP_STRUCT__entry( - __field( int, vec ) - ), - - TP_fast_assign( - __entry->vec = (int)(h - vec); - ), - - TP_printk("vec=%d [action=%s]", __entry->vec, - show_softirq_name(__entry->vec)) + TP_ARGS(h, vec) ); #endif /* _TRACE_IRQ_H */ -- cgit v0.10.2 From 382ece710bf88b08440b598731361e5a47582b62 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:05:03 +0800 Subject: tracing: Convert some workqueue events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 13171 800 72 14043 36db kernel/workqueue.o.old 12243 800 68 13111 3337 kernel/workqueue.o Two events are converted: workqueue: workqueue_insertion, workqueue_execution No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E289F.5010104@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h index e4612db..d6c9744 100644 --- a/include/trace/events/workqueue.h +++ b/include/trace/events/workqueue.h @@ -8,7 +8,7 @@ #include #include -TRACE_EVENT(workqueue_insertion, +DECLARE_EVENT_CLASS(workqueue, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), @@ -30,26 +30,18 @@ TRACE_EVENT(workqueue_insertion, __entry->thread_pid, __entry->func) ); -TRACE_EVENT(workqueue_execution, +DEFINE_EVENT(workqueue, workqueue_insertion, TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work), + TP_ARGS(wq_thread, work) +); - TP_STRUCT__entry( - __array(char, thread_comm, TASK_COMM_LEN) - __field(pid_t, thread_pid) - __field(work_func_t, func) - ), +DEFINE_EVENT(workqueue, workqueue_execution, - TP_fast_assign( - memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN); - __entry->thread_pid = wq_thread->pid; - __entry->func = work->func; - ), + TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_printk("thread=%s:%d func=%pf", __entry->thread_comm, - __entry->thread_pid, __entry->func) + TP_ARGS(wq_thread, work) ); /* Trace the creation of one workqueue thread on a cpu */ -- cgit v0.10.2 From 7703466b4c0a21b88d701882bef0d45bcb0a0281 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:05:38 +0800 Subject: tracing: Convert some power events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 4312 524 12 4848 12f0 kernel/trace/power-traces.o.old 3455 524 8 3987 f93 kernel/trace/power-traces.o Two events are converted: power: power_start, power_frequency No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Arjan van de Ven LKML-Reference: <4B0E28C2.1090906@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 9bb96e5..c4efe9b 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -16,7 +16,7 @@ enum { }; #endif -TRACE_EVENT(power_start, +DECLARE_EVENT_CLASS(power, TP_PROTO(unsigned int type, unsigned int state), @@ -35,42 +35,36 @@ TRACE_EVENT(power_start, TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state) ); -TRACE_EVENT(power_end, - - TP_PROTO(int dummy), +DEFINE_EVENT(power, power_start, - TP_ARGS(dummy), + TP_PROTO(unsigned int type, unsigned int state), - TP_STRUCT__entry( - __field( u64, dummy ) - ), + TP_ARGS(type, state) +); - TP_fast_assign( - __entry->dummy = 0xffff; - ), +DEFINE_EVENT(power, power_frequency, - TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + TP_PROTO(unsigned int type, unsigned int state), + TP_ARGS(type, state) ); +TRACE_EVENT(power_end, -TRACE_EVENT(power_frequency, - - TP_PROTO(unsigned int type, unsigned int state), + TP_PROTO(int dummy), - TP_ARGS(type, state), + TP_ARGS(dummy), TP_STRUCT__entry( - __field( u64, type ) - __field( u64, state ) + __field( u64, dummy ) ), TP_fast_assign( - __entry->type = type; - __entry->state = state; + __entry->dummy = 0xffff; ), - TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state) + TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + ); #endif /* _TRACE_POWER_H */ -- cgit v0.10.2 From 77ca1e0294f25fc26053ba14353e703158acef26 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:06:14 +0800 Subject: tracing: Convert some block events to DEFINE_EVENT use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 53570 3284 184 57038 dece block/blk-core.o.old 43702 3284 144 47130 b81a block/blk-core.o 12 events are converted: block_rq: block_rq_insert, block_rq_issue block_rq_with_error: block_rq_{abort, requeue, complete} block_bio: block_bio_{backmerge, frontmerge, queue} block_get_rq: block_getrq, block_sleeprq block_unplug: block_unplug_timer, block_unplug_io No change in functionality. Signed-off-by: Li Zefan Cc: Jens Axboe Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E28E6.7060609@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 00405b5..5fb7273 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -8,7 +8,7 @@ #include #include -TRACE_EVENT(block_rq_abort, +DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -40,41 +40,28 @@ TRACE_EVENT(block_rq_abort, __entry->nr_sector, __entry->errors) ); -TRACE_EVENT(block_rq_insert, +DEFINE_EVENT(block_rq_with_error, block_rq_abort, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), + TP_ARGS(q, rq) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( unsigned int, bytes ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), +DEFINE_EVENT(block_rq_with_error, block_rq_requeue, - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->bytes = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0; + TP_PROTO(struct request_queue *q, struct request *rq), - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_ARGS(q, rq) +); - TP_printk("%d,%d %s %u (%s) %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __entry->bytes, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) +DEFINE_EVENT(block_rq_with_error, block_rq_complete, + + TP_PROTO(struct request_queue *q, struct request *rq), + + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_issue, +DECLARE_EVENT_CLASS(block_rq, TP_PROTO(struct request_queue *q, struct request *rq), @@ -86,7 +73,7 @@ TRACE_EVENT(block_rq_issue, __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) + __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -108,68 +95,18 @@ TRACE_EVENT(block_rq_issue, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_rq_requeue, +DEFINE_EVENT(block_rq, block_rq_insert, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); -TRACE_EVENT(block_rq_complete, +DEFINE_EVENT(block_rq, block_rq_issue, TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __field( int, errors ) - __array( char, rwbs, 6 ) - __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) - ), - - TP_fast_assign( - __entry->dev = rq->rq_disk ? disk_devt(rq->rq_disk) : 0; - __entry->sector = blk_pc_request(rq) ? 0 : blk_rq_pos(rq); - __entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq); - __entry->errors = rq->errors; - - blk_fill_rwbs_rq(__entry->rwbs, rq); - blk_dump_cmd(__get_str(cmd), rq); - ), - - TP_printk("%d,%d %s (%s) %llu + %u [%d]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->rwbs, __get_str(cmd), - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->errors) + TP_ARGS(q, rq) ); TRACE_EVENT(block_bio_bounce, @@ -228,7 +165,7 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -TRACE_EVENT(block_bio_backmerge, +DECLARE_EVENT_CLASS(block_bio, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -256,63 +193,28 @@ TRACE_EVENT(block_bio_backmerge, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_bio_frontmerge, +DEFINE_EVENT(block_bio, block_bio_backmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), - - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), - - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_bio_queue, +DEFINE_EVENT(block_bio, block_bio_frontmerge, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio), + TP_ARGS(q, bio) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_bio, block_bio_queue, - TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; - __entry->sector = bio->bi_sector; - __entry->nr_sector = bio->bi_size >> 9; - blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio) ); -TRACE_EVENT(block_getrq, +DECLARE_EVENT_CLASS(block_get_rq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), @@ -341,33 +243,18 @@ TRACE_EVENT(block_getrq, __entry->nr_sector, __entry->comm) ); -TRACE_EVENT(block_sleeprq, +DEFINE_EVENT(block_get_rq, block_getrq, TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw), + TP_ARGS(q, bio, rw) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( sector_t, sector ) - __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_get_rq, block_sleeprq, - TP_fast_assign( - __entry->dev = bio ? bio->bi_bdev->bd_dev : 0; - __entry->sector = bio ? bio->bi_sector : 0; - __entry->nr_sector = bio ? bio->bi_size >> 9 : 0; - blk_fill_rwbs(__entry->rwbs, - bio ? bio->bi_rw : 0, __entry->nr_sector); - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_printk("%d,%d %s %llu + %u [%s]", - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, - (unsigned long long)__entry->sector, - __entry->nr_sector, __entry->comm) + TP_ARGS(q, bio, rw) ); TRACE_EVENT(block_plug, @@ -387,7 +274,7 @@ TRACE_EVENT(block_plug, TP_printk("[%s]", __entry->comm) ); -TRACE_EVENT(block_unplug_timer, +DECLARE_EVENT_CLASS(block_unplug, TP_PROTO(struct request_queue *q), @@ -406,23 +293,18 @@ TRACE_EVENT(block_unplug_timer, TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) ); -TRACE_EVENT(block_unplug_io, +DEFINE_EVENT(block_unplug, block_unplug_timer, TP_PROTO(struct request_queue *q), - TP_ARGS(q), + TP_ARGS(q) +); - TP_STRUCT__entry( - __field( int, nr_rq ) - __array( char, comm, TASK_COMM_LEN ) - ), +DEFINE_EVENT(block_unplug, block_unplug_io, - TP_fast_assign( - __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE]; - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - ), + TP_PROTO(struct request_queue *q), - TP_printk("[%s] %d", __entry->comm, __entry->nr_rq) + TP_ARGS(q) ); TRACE_EVENT(block_split, -- cgit v0.10.2 From 071688f36e7eba3e37b2fc48e35bfdab99b80b4d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:06:55 +0800 Subject: tracing: Convert some jbd2 events to DEFINE_EVENT Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 34903 1693 448 37044 90b4 fs/jbd2/journal.o.old 31931 1693 416 34040 84f8 fs/jbd2/journal.o Four events are converted: jbd2_commit: jbd2_start_commit, jbd2_commit_{locking, flushing, logging} No change in functionality. Signed-off-by: Li Zefan Cc: Theodore Ts'o Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E290F.7030909@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 3c60b75..96b370a 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -30,7 +30,7 @@ TRACE_EVENT(jbd2_checkpoint, jbd2_dev_to_name(__entry->dev), __entry->result) ); -TRACE_EVENT(jbd2_start_commit, +DECLARE_EVENT_CLASS(jbd2_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), @@ -53,73 +53,32 @@ TRACE_EVENT(jbd2_start_commit, __entry->sync_commit) ); -TRACE_EVENT(jbd2_commit_locking, +DEFINE_EVENT(jbd2_commit, jbd2_start_commit, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_flushing, +DEFINE_EVENT(jbd2_commit, jbd2_commit_locking, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); -TRACE_EVENT(jbd2_commit_logging, +DEFINE_EVENT(jbd2_commit, jbd2_commit_flushing, TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_ARGS(journal, commit_transaction), + TP_ARGS(journal, commit_transaction) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( char, sync_commit ) - __field( int, transaction ) - ), +DEFINE_EVENT(jbd2_commit, jbd2_commit_logging, - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->sync_commit = commit_transaction->t_synchronous_commit; - __entry->transaction = commit_transaction->t_tid; - ), + TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - TP_printk("dev %s transaction %d sync %d", - jbd2_dev_to_name(__entry->dev), __entry->transaction, - __entry->sync_commit) + TP_ARGS(journal, commit_transaction) ); TRACE_EVENT(jbd2_end_commit, -- cgit v0.10.2 From b5eb34c3592545c756e50d882c08417eb60740a7 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:07:36 +0800 Subject: tracing: Convert some ext4 events to DEFINE_TRACE Use DECLARE_EVENT_CLASS to remove duplicate code: text data bss dec hex filename 294695 6104 340 301139 49853 fs/ext4/ext4.o.old 289983 6104 324 296411 485db fs/ext4/ext4.o 5 events are convertd: ext4__write_begin: ext4_write_begin, ext4_da_write_begin ext4__write_end: ext4_{ordered, writeback, journalled}_write_end No change in functionality. Signed-off-by: Li Zefan Cc: Theodore Ts'o Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E2938.2040708@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index d09550b..318f765 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -90,7 +90,7 @@ TRACE_EVENT(ext4_allocate_inode, (unsigned long) __entry->dir, __entry->mode) ); -TRACE_EVENT(ext4_write_begin, +DECLARE_EVENT_CLASS(ext4__write_begin, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int flags), @@ -118,7 +118,23 @@ TRACE_EVENT(ext4_write_begin, __entry->pos, __entry->len, __entry->flags) ); -TRACE_EVENT(ext4_ordered_write_end, +DEFINE_EVENT(ext4__write_begin, ext4_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DEFINE_EVENT(ext4__write_begin, ext4_da_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags) +); + +DECLARE_EVENT_CLASS(ext4__write_end, TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), @@ -145,57 +161,36 @@ TRACE_EVENT(ext4_ordered_write_end, __entry->pos, __entry->len, __entry->copied) ); -TRACE_EVENT(ext4_writeback_write_end, +DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), + TP_ARGS(inode, pos, len, copied) +); - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), +DEFINE_EVENT(ext4__write_end, ext4_writeback_write_end, - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_ARGS(inode, pos, len, copied) ); -TRACE_EVENT(ext4_journalled_write_end, +DEFINE_EVENT(ext4__write_end, ext4_journalled_write_end, + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, unsigned int copied), - TP_ARGS(inode, pos, len, copied), - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), + TP_ARGS(inode, pos, len, copied) +); - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), +DEFINE_EVENT(ext4__write_end, ext4_da_write_end, - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied) ); TRACE_EVENT(ext4_writepage, @@ -337,60 +332,6 @@ TRACE_EVENT(ext4_da_writepages_result, (unsigned long) __entry->writeback_index) ); -TRACE_EVENT(ext4_da_write_begin, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int flags), - - TP_ARGS(inode, pos, len, flags), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, flags ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->flags = flags; - ), - - TP_printk("dev %s ino %lu pos %llu len %u flags %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->flags) -); - -TRACE_EVENT(ext4_da_write_end, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int copied), - - TP_ARGS(inode, pos, len, copied), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), - - TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, - __entry->pos, __entry->len, __entry->copied) -); - TRACE_EVENT(ext4_discard_blocks, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), -- cgit v0.10.2 From 470dda7417f284b9cfc96560b2acd98df63798a2 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 26 Nov 2009 15:08:01 +0800 Subject: tracing: Restore original format of sched events The original format for sched_stat_iowait and sched_stat_sleep: $ cat events/sched/sched_stat_iowait/format ... print fmt: "comm=%s pid=%d delay=%Lu [ns]", ... $ cat events/sched/sched_stat_sleep/format ... print fmt: "comm=%s pid=%d delay=%Lu [ns]", ... But commit commit 75ec29ab848a7e92a41aaafaeb33d1afbc839be4 ("tracing: Convert some sched trace events to DEFINE_EVENT and _PRINT") broke the format: $ cat events/sched/sched_stat_iowait/format print fmt: "task: %s:%d iowait: %Lu [ns]", ... $ cat events/sched/sched_stat_sleep/format print fmt: "task: %s:%d sleep: %Lu [ns]", ... No change in functionality. Signed-off-by: Li Zefan Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0E2951.9050800@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 5ce7950..9d316b2 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -355,23 +355,17 @@ DEFINE_EVENT(sched_stat_template, sched_stat_wait, * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). */ -DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_sleep, - TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), - TP_printk("task: %s:%d sleep: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay)); +DEFINE_EVENT(sched_stat_template, sched_stat_sleep, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); /* * Tracepoint for accounting iowait time (time the task is not runnable * due to waiting on IO to complete). */ -DEFINE_EVENT_PRINT(sched_stat_template, sched_stat_iowait, - TP_PROTO(struct task_struct *tsk, u64 delay), - TP_ARGS(tsk, delay), - TP_printk("task: %s:%d iowait: %Lu [ns]", - __entry->comm, __entry->pid, - (unsigned long long)__entry->delay)); +DEFINE_EVENT(sched_stat_template, sched_stat_iowait, + TP_PROTO(struct task_struct *tsk, u64 delay), + TP_ARGS(tsk, delay)); /* * Tracepoint for accounting runtime (time the task is executing -- cgit v0.10.2 From d99be40aff88722ab03ee295e4f6c13a4cca9a3d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 26 Nov 2009 05:35:40 +0100 Subject: ksym_tracer: Fix breakpoint removal after modification The error path of a breakpoint modification is broken in the ksym tracer. A modified breakpoint hlist node is immediately released after its removal. Also we leak a breakpoint in this case. Fix the path. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Prasad LKML-Reference: <1259210142-5714-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 11935b5..9f040e4 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -339,14 +339,20 @@ static ssize_t ksym_trace_filter_write(struct file *file, ksym_hbp_handler, true); if (IS_ERR(entry->ksym_hbp)) entry->ksym_hbp = NULL; - if (!entry->ksym_hbp) + + /* modified without problem */ + if (entry->ksym_hbp) { + ret = 0; goto out; + } + } else { + ret = 0; } + /* Error or "symbol:---" case: drop it */ ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); kfree(entry); - ret = 0; goto out; } else { /* Check for malformed request: (4) */ -- cgit v0.10.2 From c6567f642e20bcc79abed030f44be5b0d6da2ded Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 26 Nov 2009 05:35:41 +0100 Subject: hw-breakpoints: Improve in-kernel event creation error granularity In fail case, perf_event_create_kernel_counter() returns NULL instead of an error, which doesn't help us to inform the user about the origin of the problem from the outer most callers. Often we can just return -EINVAL, which doesn't help anyone when it's eventually about a memory allocation failure. Then, this patch makes perf_event_create_kernel_counter() always return a detailed error code. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Prasad LKML-Reference: <1259210142-5714-2-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 35df94e..34a1b9d7 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4780,14 +4780,17 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, */ ctx = find_get_context(pid, cpu); - if (IS_ERR(ctx)) - return NULL; + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto err_exit; + } event = perf_event_alloc(attr, cpu, ctx, NULL, NULL, callback, GFP_KERNEL); - err = PTR_ERR(event); - if (IS_ERR(event)) + if (IS_ERR(event)) { + err = PTR_ERR(event); goto err_put_context; + } event->filp = NULL; WARN_ON_ONCE(ctx->parent_ctx); @@ -4804,11 +4807,10 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, return event; -err_put_context: - if (err < 0) - put_ctx(ctx); - - return NULL; + err_put_context: + put_ctx(ctx); + err_exit: + return ERR_PTR(err); } EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); -- cgit v0.10.2 From 605bfaee9078cd0b01d83402315389839ee4bb5c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 26 Nov 2009 05:35:42 +0100 Subject: hw-breakpoints: Simplify error handling in breakpoint creation requests This simplifies the error handling when we create a breakpoint. We don't need to check the NULL return value corner case anymore since we have improved perf_event_create_kernel_counter() to always return an error code in the failure case. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Steven Rostedt Cc: Prasad LKML-Reference: <1259210142-5714-3-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b25f894..75e0cd84 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -657,10 +657,7 @@ restore: tsk, true); thread->ptrace_bps[i] = NULL; - if (!bp) { /* incorrect bp, or we have a bug in bp API */ - rc = -EINVAL; - break; - } + /* Incorrect bp, or we have a bug in bp API */ if (IS_ERR(bp)) { rc = PTR_ERR(bp); bp = NULL; @@ -729,9 +726,6 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, tsk, bp->attr.disabled); } - - if (!bp) - return -EIO; /* * CHECKME: the previous code returned -EIO if the addr wasn't a * valid task virtual addr. The new one will return -EINVAL in this diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 06d372f..dd3fb4a 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -442,7 +442,7 @@ register_wide_hw_breakpoint(unsigned long addr, *pevent = bp; - if (IS_ERR(bp) || !bp) { + if (IS_ERR(bp)) { err = PTR_ERR(bp); goto fail; } @@ -453,7 +453,7 @@ register_wide_hw_breakpoint(unsigned long addr, fail: for_each_possible_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); - if (IS_ERR(*pevent) || !*pevent) + if (IS_ERR(*pevent)) break; unregister_hw_breakpoint(*pevent); } diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index 9f040e4..c538b15 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -200,12 +200,9 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, entry->len, entry->type, ksym_hbp_handler, true); + if (IS_ERR(entry->ksym_hbp)) { - entry->ksym_hbp = NULL; ret = PTR_ERR(entry->ksym_hbp); - } - - if (!entry->ksym_hbp) { printk(KERN_INFO "ksym_tracer request failed. Try again" " later!!\n"); goto err; @@ -332,21 +329,16 @@ static ssize_t ksym_trace_filter_write(struct file *file, if (changed) { unregister_wide_hw_breakpoint(entry->ksym_hbp); entry->type = op; + ret = 0; if (op > 0) { entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, entry->len, entry->type, ksym_hbp_handler, true); if (IS_ERR(entry->ksym_hbp)) - entry->ksym_hbp = NULL; - - /* modified without problem */ - if (entry->ksym_hbp) { - ret = 0; + ret = PTR_ERR(entry->ksym_hbp); + else goto out; - } - } else { - ret = 0; } /* Error or "symbol:---" case: drop it */ ksym_filter_entry_count--; diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c index 9506381..ee7f9fb 100644 --- a/samples/hw_breakpoint/data_breakpoint.c +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -61,9 +61,6 @@ static int __init hw_break_module_init(void) if (IS_ERR(sample_hbp)) { ret = PTR_ERR(sample_hbp); goto fail; - } else if (!sample_hbp) { - ret = -EINVAL; - goto fail; } printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name); -- cgit v0.10.2 From 2c31b7958fd21df9fa04e5c36cda0f063ac70b27 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 26 Nov 2009 06:04:38 +0100 Subject: x86/hw-breakpoints: Don't lose GE flag while disabling a breakpoint When we schedule out a breakpoint from the cpu, we also incidentally remove the "Global exact breakpoint" flag from the breakpoint control register. It makes us losing the fine grained precision about the origin of the instructions that may trigger breakpoint exceptions for the other breakpoints running in this cpu. Reported-by: Prasad Signed-off-by: Frederic Weisbecker LKML-Reference: <1259211878-6013-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 92ea5aa..d42f65a 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -59,22 +59,28 @@ static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); -/* - * Encode the length, type, Exact, and Enable bits for a particular breakpoint - * as stored in debug register 7. - */ -unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +static inline unsigned long +__encode_dr7(int drnum, unsigned int len, unsigned int type) { unsigned long bp_info; bp_info = (len | type) & 0xf; bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); - bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) | - DR_GLOBAL_SLOWDOWN; + bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); + return bp_info; } /* + * Encode the length, type, Exact, and Enable bits for a particular breakpoint + * as stored in debug register 7. + */ +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; +} + +/* * Decode the length and type bits for a particular breakpoint as * stored in debug register 7. Return the "enabled" status. */ @@ -154,7 +160,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) return; dr7 = &__get_cpu_var(cpu_dr7); - *dr7 &= ~encode_dr7(i, info->len, info->type); + *dr7 &= ~__encode_dr7(i, info->len, info->type); set_debugreg(*dr7, 7); } -- cgit v0.10.2 From 11e6635763bdc0e24b39a38876574660755acffc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 25 Nov 2009 23:01:50 -0800 Subject: kernel/hw_breakpoint.c: Fix local/global shadowing If the new percpu tree is combined with the perf events tree the following new warning triggers: kernel/hw_breakpoint.c: In function 'toggle_bp_task_slot': kernel/hw_breakpoint.c:151: warning: 'task_bp_pinned' is used uninitialized in this function Because it's not valid anymore to define a local variable and a percpu variable (even if it's file scope local) with the same name. Rename the local variable to resolve this. Signed-off-by: Andrew Morton Cc: Frederic Weisbecker Cc: K.Prasad Cc: Tejun Heo Cc: Linus Torvalds LKML-Reference: <200911260701.nAQ71owx016356@imap1.linux-foundation.org> [ v2: added changelog ] Signed-off-by: Ingo Molnar diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index dd3fb4a..32e1018 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -121,7 +121,7 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) int count = 0; struct perf_event *bp; struct perf_event_context *ctx = tsk->perf_event_ctxp; - unsigned int *task_bp_pinned; + unsigned int *tsk_pinned; struct list_head *list; unsigned long flags; @@ -146,15 +146,15 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) return; - task_bp_pinned = per_cpu(task_bp_pinned, cpu); + tsk_pinned = per_cpu(task_bp_pinned, cpu); if (enable) { - task_bp_pinned[count]++; + tsk_pinned[count]++; if (count > 0) - task_bp_pinned[count-1]--; + tsk_pinned[count-1]--; } else { - task_bp_pinned[count]--; + tsk_pinned[count]--; if (count > 0) - task_bp_pinned[count-1]++; + tsk_pinned[count-1]++; } } -- cgit v0.10.2 From f6630114d9198aa959ac95c131334c020038f253 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 17 Nov 2009 18:22:15 -0600 Subject: sched: Limit the number of scheduler debug messages Remove the verbose scheduler debug messages unless kernel parameter "sched_debug" set. /proc/sched_debug unchanged. Signed-off-by: Mike Travis Cc: Heiko Carstens Cc: Roland Dreier Cc: Randy Dunlap Cc: Tejun Heo Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: Yinghai Lu Cc: David Rientjes Cc: Steven Rostedt Cc: Rusty Russell Cc: Hidetoshi Seto Cc: Jack Steiner Cc: Frederic Weisbecker LKML-Reference: <20091118002221.489305000@alcatraz.americas.sgi.com> Signed-off-by: Ingo Molnar diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9107b38..f2a9507 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2182,6 +2182,8 @@ and is between 256 and 4096 characters. It is defined in the file sbni= [NET] Granch SBNI12 leased line adapter + sched_debug [KNL] Enables verbose scheduler debug messages. + sc1200wdt= [HW,WDT] SC1200 WDT (watchdog) driver Format: [,[,]] diff --git a/kernel/sched.c b/kernel/sched.c index a57c6ae..48ff66a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7720,6 +7720,16 @@ early_initcall(migration_init); #ifdef CONFIG_SCHED_DEBUG +static __read_mostly int sched_domain_debug_enabled; + +static int __init sched_domain_debug_setup(char *str) +{ + sched_domain_debug_enabled = 1; + + return 0; +} +early_param("sched_debug", sched_domain_debug_setup); + static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, struct cpumask *groupmask) { @@ -7806,6 +7816,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) cpumask_var_t groupmask; int level = 0; + if (!sched_domain_debug_enabled) + return; + if (!sd) { printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); return; -- cgit v0.10.2 From 9b3660a55a9052518c91cc7c62d89e22f3f6f490 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 17 Nov 2009 18:22:16 -0600 Subject: x86: Limit number of per cpu TSC sync messages Limit the number of per cpu TSC sync messages by only printing to the console if an error occurs, otherwise print as a DEBUG message. The info message "Skipping synchronization ..." is only printed after the last cpu has booted. Signed-off-by: Mike Travis Cc: Heiko Carstens Cc: Roland Dreier Cc: Randy Dunlap Cc: Tejun Heo Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: Yinghai Lu Cc: David Rientjes Cc: Steven Rostedt Cc: Rusty Russell Cc: Hidetoshi Seto Cc: Jack Steiner Cc: Frederic Weisbecker LKML-Reference: <20091118002222.181053000@alcatraz.americas.sgi.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index f379309..eed1568 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -114,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu) return; if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { - printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n"); + if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) + pr_info( + "Skipped synchronization checks as TSC is reliable.\n"); return; } - pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:", - smp_processor_id(), cpu); - /* * Reset it - in case this is a second bootup: */ @@ -142,12 +141,14 @@ void __cpuinit check_tsc_sync_source(int cpu) cpu_relax(); if (nr_warps) { - printk("\n"); + pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", + smp_processor_id(), cpu); pr_warning("Measured %Ld cycles TSC warp between CPUs, " "turning off TSC clock.\n", max_warp); mark_tsc_unstable("check_tsc_sync_source failed"); } else { - printk(" passed.\n"); + pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", + smp_processor_id(), cpu); } /* -- cgit v0.10.2 From 767df1bdd8cbff2c8c40c9ac8295bbdaa5fb24c4 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 17:29:02 +0900 Subject: x86, mce: Add __cpuinit to hotplug callback functions The mce_disable_cpu() and mce_reenable_cpu() are called only from mce_cpu_callback() which is marked as __cpuinit. So these functions can be __cpuinit too. Signed-off-by: Hidetoshi Seto Cc: Andi Kleen LKML-Reference: <4B0E3C4E.4090809@jp.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 5f277ca..0bcaa38 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1953,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu) } /* Make sure there are no machine checks on offlined CPUs. */ -static void mce_disable_cpu(void *h) +static void __cpuinit mce_disable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; if (!mce_available(¤t_cpu_data)) return; + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < banks; i++) { @@ -1970,7 +1971,7 @@ static void mce_disable_cpu(void *h) } } -static void mce_reenable_cpu(void *h) +static void __cpuinit mce_reenable_cpu(void *h) { unsigned long action = *(unsigned long *)h; int i; -- cgit v0.10.2 From 79b0379cee09b00ef309384aff652e328e438c79 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 25 Nov 2009 14:18:26 -0500 Subject: x86: Optimize loadsegment() Zero the input register in the exception handler instead of using an extra register to pass in a zero value. Signed-off-by: Brian Gerst LKML-Reference: <1259176706-5908-1-git-send-email-brgerst@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 1a953e2..537395a 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -156,18 +156,19 @@ extern void native_load_gs_index(unsigned); * segment if something goes wrong.. */ #define loadsegment(seg, value) \ +do { \ + unsigned short __val = value; \ asm volatile("\n" \ "1:\t" \ "movl %k0,%%" #seg "\n" \ - "2:\n" \ ".section .fixup,\"ax\"\n" \ - "3:\t" \ - "movl %k1, %%" #seg "\n\t" \ - "jmp 2b\n" \ + "2:\t" \ + "xorl %k0,%k0\n\t" \ + "jmp 1b\n" \ ".previous\n" \ - _ASM_EXTABLE(1b,3b) \ - : :"r" (value), "r" (0) : "memory") - + _ASM_EXTABLE(1b, 2b) \ + : "+r" (__val) : : "memory"); \ +} while (0) /* * Save a segment register away -- cgit v0.10.2 From 64b028b22616946a05bf9580f7f7f7ee2ac070b4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 10:37:55 +0100 Subject: x86: Clean up the loadsegment() macro Make it readable in the source too, not just in the assembly output. No change in functionality. Cc: Brian Gerst LKML-Reference: <1259176706-5908-1-git-send-email-brgerst@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 537395a..022a843 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -155,19 +155,21 @@ extern void native_load_gs_index(unsigned); * Load a segment. Fall back on loading the zero * segment if something goes wrong.. */ -#define loadsegment(seg, value) \ -do { \ - unsigned short __val = value; \ - asm volatile("\n" \ - "1:\t" \ - "movl %k0,%%" #seg "\n" \ - ".section .fixup,\"ax\"\n" \ - "2:\t" \ - "xorl %k0,%k0\n\t" \ - "jmp 1b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 2b) \ - : "+r" (__val) : : "memory"); \ +#define loadsegment(seg, value) \ +do { \ + unsigned short __val = (value); \ + \ + asm volatile(" \n" \ + "1: movl %k0,%%" #seg " \n" \ + \ + ".section .fixup,\"ax\" \n" \ + "2: xorl %k0,%k0 \n" \ + " jmp 1b \n" \ + ".previous \n" \ + \ + _ASM_EXTABLE(1b, 2b) \ + \ + : "+r" (__val) : : "memory"); \ } while (0) /* -- cgit v0.10.2 From 80bbf6b641c8843b9d751a1f299aa7ee073ab9d4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 25 Nov 2009 21:20:53 +0100 Subject: hw-breakpoints: Fix unused function in off-case bp_perf_event_destroy() is unused in its off-case version, let's remove it to fix the following warning reported by Stephen Rothwell in linux-next: kernel/perf_event.c:4306: warning: 'bp_perf_event_destroy' defined but not used Reported-by: Stephen Rothwell Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra LKML-Reference: <1259180453-5813-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 34a1b9d7..f8c7939 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4303,10 +4303,6 @@ void perf_bp_event(struct perf_event *bp, void *data) perf_swevent_add(bp, 1, 1, &sample, regs); } #else -static void bp_perf_event_destroy(struct perf_event *event) -{ -} - static const struct pmu *bp_perf_event_init(struct perf_event *bp) { return NULL; -- cgit v0.10.2 From 8ec6993d9f7d961014af970ded57542961fe9ad9 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Wed, 25 Nov 2009 11:17:36 -0500 Subject: x86, 64-bit: Set data segments to null after switching to 64-bit mode This prevents kernel threads from inheriting non-null segment selectors, and causing optimizations in __switch_to() to be ineffective. Signed-off-by: Brian Gerst Cc: Tim Blechmann Cc: Linus Torvalds Cc: H. Peter Anvin Cc: Jeremy Fitzhardinge Cc: Jan Beulich LKML-Reference: <1259165856-3512-1-git-send-email-brgerst@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 2640660..17ba9ec 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -212,8 +212,8 @@ ENTRY(secondary_startup_64) */ lgdt early_gdt_descr(%rip) - /* set up data segments. actually 0 would do too */ - movl $__KERNEL_DS,%eax + /* set up data segments */ + xorl %eax,%eax movl %eax,%ds movl %eax,%ss movl %eax,%es -- cgit v0.10.2 From 918bc960dc630b1a79c0d2991a81985812ff69f5 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Wed, 25 Nov 2009 10:20:19 -0600 Subject: x86: SGI UV: Map low MMR ranges Explicitly mmap the UV chipset MMR address ranges used to access blade-local registers. Although these same MMRs are also mmaped at higher addresses, the low range is more convenient when accessing blade-local registers. The low range addresses always alias to the local blade regardless of the blade id. Signed-off-by: Jack Steiner LKML-Reference: <20091125162018.GA25445@sgi.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f5f5886..6d42549 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -409,6 +409,12 @@ static __init void map_mmioh_high(int max_pnode) map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); } +static __init void map_low_mmrs(void) +{ + init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); + init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); +} + static __init void uv_rtc_init(void) { long status; @@ -550,6 +556,8 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask; + map_low_mmrs(); + m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; -- cgit v0.10.2 From d1eb650ff4130972fa21462fa49cd35a2865403b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:45 -0500 Subject: tracepoint: Move signal sending tracepoint to events/signal.h Move signal sending event to events/signal.h. This patch also renames sched_signal_send event to signal_generate. Changes in v4: - Fix a typo of task_struct pointer. Changes in v3: - Add docbook style comments Changes in v2: - Add siginfo argument - Add siginfo storing macro Signed-off-by: Masami Hiramatsu Reviewed-by: Jason Baron Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215645.30449.60208.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl index b0756d0..8bca1d5 100644 --- a/Documentation/DocBook/tracepoint.tmpl +++ b/Documentation/DocBook/tracepoint.tmpl @@ -86,4 +86,9 @@ !Iinclude/trace/events/irq.h + + SIGNAL +!Iinclude/trace/events/signal.h + + diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9d316b2..cfceb0b 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -288,31 +288,6 @@ TRACE_EVENT(sched_process_fork, ); /* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - ), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ), - - TP_printk("sig=%d comm=%s pid=%d", - __entry->sig, __entry->comm, __entry->pid) -); - -/* * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE * adding sched_stat support to SCHED_FIFO/RR would be welcome. */ diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h new file mode 100644 index 0000000..ef51756 --- /dev/null +++ b/include/trace/events/signal.h @@ -0,0 +1,66 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM signal + +#if !defined(_TRACE_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_SIGNAL_H + +#include +#include +#include + +#define TP_STORE_SIGINFO(__entry, info) \ + do { \ + if (info == SEND_SIG_NOINFO) { \ + __entry->errno = 0; \ + __entry->code = SI_USER; \ + } else if (info == SEND_SIG_PRIV) { \ + __entry->errno = 0; \ + __entry->code = SI_KERNEL; \ + } else { \ + __entry->errno = info->si_errno; \ + __entry->code = info->si_code; \ + } \ + } while (0) + +/** + * signal_generate - called when a signal is generated + * @sig: signal number + * @info: pointer to struct siginfo + * @task: pointer to struct task_struct + * + * Current process sends a 'sig' signal to 'task' process with + * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, + * 'info' is not a pointer and you can't access its field. Instead, + * SEND_SIG_NOINFO means that si_code is SI_USER, and SEND_SIG_PRIV + * means that si_code is SI_KERNEL. + */ +TRACE_EVENT(signal_generate, + + TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), + + TP_ARGS(sig, info, task), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + ), + + TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", + __entry->sig, __entry->errno, __entry->code, + __entry->comm, __entry->pid) +); + +#endif /* _TRACE_SIGNAL_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/signal.c b/kernel/signal.c index 6705320..a1e0cc6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -27,7 +27,8 @@ #include #include #include -#include +#define CREATE_TRACE_POINTS +#include #include #include @@ -834,7 +835,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, struct sigqueue *q; int override_rlimit; - trace_sched_signal_send(sig, t); + trace_signal_generate(sig, info, t); assert_spin_locked(&t->sighand->siglock); -- cgit v0.10.2 From f9d4257e01d266e67420cc99d456b6d4c8464f54 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:51 -0500 Subject: tracepoint: Add signal deliver event Add a tracepoint where a process gets a signal. This tracepoint shows signal-number, sa-handler and sa-flag. Changes in v3: - Add docbook style comments Changes in v2: - Add siginfo argument - Fix comment Signed-off-by: Masami Hiramatsu Reviewed-by: Jason Baron Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215651.30449.20926.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index ef51756..a6d71de 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -60,6 +60,45 @@ TRACE_EVENT(signal_generate, __entry->comm, __entry->pid) ); +/** + * signal_deliver - called when a signal is delivered + * @sig: signal number + * @info: pointer to struct siginfo + * @ka: pointer to struct k_sigaction + * + * A 'sig' signal is delivered to current process with 'info' siginfo, + * and it will be handled by 'ka'. ka->sa.sa_handler can be SIG_IGN or + * SIG_DFL. + * Note that some signals reported by signal_generate tracepoint can be + * lost, ignored or modified (by debugger) before hitting this tracepoint. + * This means, this can show which signals are actually delivered, but + * matching generated signals and delivered signals may not be correct. + */ +TRACE_EVENT(signal_deliver, + + TP_PROTO(int sig, struct siginfo *info, struct k_sigaction *ka), + + TP_ARGS(sig, info, ka), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, errno ) + __field( int, code ) + __field( unsigned long, sa_handler ) + __field( unsigned long, sa_flags ) + ), + + TP_fast_assign( + __entry->sig = sig; + TP_STORE_SIGINFO(__entry, info); + __entry->sa_handler = (unsigned long)ka->sa.sa_handler; + __entry->sa_flags = ka->sa.sa_flags; + ), + + TP_printk("sig=%d errno=%d code=%d sa_handler=%lx sa_flags=%lx", + __entry->sig, __entry->errno, __entry->code, + __entry->sa_handler, __entry->sa_flags) +); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ diff --git a/kernel/signal.c b/kernel/signal.c index a1e0cc6..349d449 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1840,6 +1840,9 @@ relock: ka = &sighand->action[signr-1]; } + /* Trace actually delivered signals. */ + trace_signal_deliver(signr, info, ka); + if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ continue; if (ka->sa.sa_handler != SIG_DFL) { -- cgit v0.10.2 From ba005e1f417295d28cd1563ab82bc33af07fb16a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 24 Nov 2009 16:56:58 -0500 Subject: tracepoint: Add signal loss events Add signal_overflow_fail and signal_lose_info tracepoints for signal-lost events. Changes in v3: - Add docbook style comments Changes in v2: - Use siginfo string macro Suggested-by: Roland McGrath Reviewed-by: Jason Baron Signed-off-by: Masami Hiramatsu Acked-by: Roland McGrath Cc: systemtap Cc: DLE Cc: Oleg Nesterov LKML-Reference: <20091124215658.30449.9934.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index a6d71de..a510b75 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -99,6 +99,74 @@ TRACE_EVENT(signal_deliver, __entry->sig, __entry->errno, __entry->code, __entry->sa_handler, __entry->sa_flags) ); + +/** + * signal_overflow_fail - called when signal queue is overflow + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel fails to generate 'sig' signal with 'info' siginfo, because + * siginfo queue is overflow, and the signal is dropped. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of RT signals. + */ +TRACE_EVENT(signal_overflow_fail, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); + +/** + * signal_lose_info - called when siginfo is lost + * @sig: signal number + * @group: signal to process group or not (bool) + * @info: pointer to struct siginfo + * + * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo + * queue is overflow. + * 'group' is not 0 if the signal will be sent to a process group. + * 'sig' is always one of non-RT signals. + */ +TRACE_EVENT(signal_lose_info, + + TP_PROTO(int sig, int group, struct siginfo *info), + + TP_ARGS(sig, group, info), + + TP_STRUCT__entry( + __field( int, sig ) + __field( int, group ) + __field( int, errno ) + __field( int, code ) + ), + + TP_fast_assign( + __entry->sig = sig; + __entry->group = group; + TP_STORE_SIGINFO(__entry, info); + ), + + TP_printk("sig=%d group=%d errno=%d code=%d", + __entry->sig, __entry->group, __entry->errno, __entry->code) +); #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ diff --git a/kernel/signal.c b/kernel/signal.c index 349d449..93e72e5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -897,12 +897,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, break; } } else if (!is_si_special(info)) { - if (sig >= SIGRTMIN && info->si_code != SI_USER) - /* - * Queue overflow, abort. We may abort if the signal was rt - * and sent by user using something other than kill(). - */ + if (sig >= SIGRTMIN && info->si_code != SI_USER) { + /* + * Queue overflow, abort. We may abort if the + * signal was rt and sent by user using something + * other than kill(). + */ + trace_signal_overflow_fail(sig, group, info); return -EAGAIN; + } else { + /* + * This is a silent loss of information. We still + * send the signal, but the *info bits are lost. + */ + trace_signal_lose_info(sig, group, info); + } } out_set: -- cgit v0.10.2 From d180c5bccec02612256fd8076ff3c1fac3429553 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 14:48:30 +0900 Subject: sched: Introduce task_times() to replace task_{u,s}time() pair Functions task_{u,s}time() are called in pair in almost all cases. However task_stime() is implemented to call task_utime() from its inside, so such paired calls run task_utime() twice. It means we do heavy divisions (div_u64 + do_div) twice to get utime and stime which can be obtained at same time by one set of divisions. This patch introduces a function task_times(*tsk, *utime, *stime) to retrieve utime and stime at once in better, optimized way. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Stanislaw Gruszka Cc: Spencer Candland Cc: Oleg Nesterov Cc: Balbir Singh Cc: Americo Wang LKML-Reference: <4B0E16AE.906@jp.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/fs/proc/array.c b/fs/proc/array.c index e209f64..330deda 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -535,8 +535,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, if (!whole) { min_flt = task->min_flt; maj_flt = task->maj_flt; - utime = task_utime(task); - stime = task_stime(task); + task_times(task, &utime, &stime); gtime = task_gtime(task); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 78ba664..fe6ae15 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1723,6 +1723,7 @@ static inline void put_task_struct(struct task_struct *t) extern cputime_t task_utime(struct task_struct *p); extern cputime_t task_stime(struct task_struct *p); extern cputime_t task_gtime(struct task_struct *p); +extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); /* * Per process flags diff --git a/kernel/exit.c b/kernel/exit.c index f7864ac..29068ab 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -91,6 +91,8 @@ static void __exit_signal(struct task_struct *tsk) if (atomic_dec_and_test(&sig->count)) posix_cpu_timers_exit_group(tsk); else { + cputime_t utime, stime; + /* * If there is any task waiting for the group exit * then notify it: @@ -110,8 +112,9 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - sig->utime = cputime_add(sig->utime, task_utime(tsk)); - sig->stime = cputime_add(sig->stime, task_stime(tsk)); + task_times(tsk, &utime, &stime); + sig->utime = cputime_add(sig->utime, utime); + sig->stime = cputime_add(sig->stime, stime); sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; diff --git a/kernel/sched.c b/kernel/sched.c index 315ba40..475a6f2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5191,6 +5191,14 @@ cputime_t task_stime(struct task_struct *p) { return p->stime; } + +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + if (ut) + *ut = task_utime(p); + if (st) + *st = task_stime(p); +} #else #ifndef nsecs_to_cputime @@ -5198,41 +5206,48 @@ cputime_t task_stime(struct task_struct *p) msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC)) #endif -cputime_t task_utime(struct task_struct *p) +void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { - cputime_t utime = p->utime, total = utime + p->stime; - u64 temp; + cputime_t rtime, utime = p->utime, total = utime + p->stime; /* * Use CFS's precise accounting: */ - temp = (u64)nsecs_to_cputime(p->se.sum_exec_runtime); + rtime = nsecs_to_cputime(p->se.sum_exec_runtime); if (total) { - temp *= utime; + u64 temp; + + temp = (u64)(rtime * utime); do_div(temp, total); - } - utime = (cputime_t)temp; + utime = (cputime_t)temp; + } else + utime = rtime; + /* + * Compare with previous values, to keep monotonicity: + */ p->prev_utime = max(p->prev_utime, utime); - return p->prev_utime; + p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); + + if (ut) + *ut = p->prev_utime; + if (st) + *st = p->prev_stime; +} + +cputime_t task_utime(struct task_struct *p) +{ + cputime_t utime; + task_times(p, &utime, NULL); + return utime; } cputime_t task_stime(struct task_struct *p) { cputime_t stime; - - /* - * Use CFS's precise accounting. (we subtract utime from - * the total, to make sure the total observed by userspace - * grows monotonically - apps rely on that): - */ - stime = nsecs_to_cputime(p->se.sum_exec_runtime) - task_utime(p); - - if (stime >= 0) - p->prev_stime = max(p->prev_stime, stime); - - return p->prev_stime; + task_times(p, NULL, &stime); + return stime; } #endif diff --git a/kernel/sys.c b/kernel/sys.c index ce17760..bbdfce0 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1346,8 +1346,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) utime = stime = cputime_zero; if (who == RUSAGE_THREAD) { - utime = task_utime(current); - stime = task_stime(current); + task_times(current, &utime, &stime); accumulate_thread_rusage(p, r); maxrss = p->signal->maxrss; goto out; -- cgit v0.10.2 From d5b7c78e975302a1bab28263266c39ecb71acad4 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 14:49:05 +0900 Subject: sched: Remove task_{u,s,g}time() Now all task_{u,s}time() pairs are replaced by task_times(). And task_gtime() is too simple to be an inline function. Cleanup them all. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Stanislaw Gruszka Cc: Spencer Candland Cc: Oleg Nesterov Cc: Balbir Singh Cc: Americo Wang LKML-Reference: <4B0E16D1.70902@jp.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/fs/proc/array.c b/fs/proc/array.c index 330deda..ca61a88 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -511,7 +511,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, do { min_flt += t->min_flt; maj_flt += t->maj_flt; - gtime = cputime_add(gtime, task_gtime(t)); + gtime = cputime_add(gtime, t->gtime); t = next_thread(t); } while (t != task); @@ -536,7 +536,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt = task->min_flt; maj_flt = task->maj_flt; task_times(task, &utime, &stime); - gtime = task_gtime(task); + gtime = task->gtime; } /* scale priority and nice values from timeslices to -20..20 */ diff --git a/include/linux/sched.h b/include/linux/sched.h index fe6ae15..0395b0f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1720,9 +1720,6 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } -extern cputime_t task_utime(struct task_struct *p); -extern cputime_t task_stime(struct task_struct *p); -extern cputime_t task_gtime(struct task_struct *p); extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); /* diff --git a/kernel/exit.c b/kernel/exit.c index 29068ab..2eaf68b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -115,7 +115,7 @@ static void __exit_signal(struct task_struct *tsk) task_times(tsk, &utime, &stime); sig->utime = cputime_add(sig->utime, utime); sig->stime = cputime_add(sig->stime, stime); - sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); + sig->gtime = cputime_add(sig->gtime, tsk->gtime); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; diff --git a/kernel/sched.c b/kernel/sched.c index 475a6f2..82251c2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5182,22 +5182,12 @@ void account_idle_ticks(unsigned long ticks) * Use precise platform statistics if available: */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING -cputime_t task_utime(struct task_struct *p) -{ - return p->utime; -} - -cputime_t task_stime(struct task_struct *p) -{ - return p->stime; -} - void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { if (ut) - *ut = task_utime(p); + *ut = p->utime; if (st) - *st = task_stime(p); + *st = p->stime; } #else @@ -5235,27 +5225,8 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) if (st) *st = p->prev_stime; } - -cputime_t task_utime(struct task_struct *p) -{ - cputime_t utime; - task_times(p, &utime, NULL); - return utime; -} - -cputime_t task_stime(struct task_struct *p) -{ - cputime_t stime; - task_times(p, NULL, &stime); - return stime; -} #endif -inline cputime_t task_gtime(struct task_struct *p) -{ - return p->gtime; -} - /* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. -- cgit v0.10.2 From b7b20df91d43d5e59578b8fc16e895c0c8cbd9b5 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 26 Nov 2009 14:49:27 +0900 Subject: sched, time: Define nsecs_to_jiffies() Use of msecs_to_jiffies() for nsecs_to_cputime() have some problems: - The type of msecs_to_jiffies()'s argument is unsigned int, so it cannot convert msecs greater than UINT_MAX = about 49.7 days. - msecs_to_jiffies() returns MAX_JIFFY_OFFSET if MSB of argument is set, assuming that input was negative value. So it cannot convert msecs greater than INT_MAX = about 24.8 days too. This patch defines a new function nsecs_to_jiffies() that can deal greater values, and that can deal all incoming values as unsigned. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Stanislaw Gruszka Cc: Spencer Candland Cc: Oleg Nesterov Cc: Balbir Singh Cc: Amrico Wang Cc: Thomas Gleixner Cc: John Stultz LKML-Reference: <4B0E16E7.5070307@jp.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 1a9cf78bf..6811f4b 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -307,6 +307,7 @@ extern clock_t jiffies_to_clock_t(long x); extern unsigned long clock_t_to_jiffies(unsigned long x); extern u64 jiffies_64_to_clock_t(u64 x); extern u64 nsec_to_clock_t(u64 x); +extern unsigned long nsecs_to_jiffies(u64 n); #define TIMESTAMP_SIZE 30 diff --git a/kernel/sched.c b/kernel/sched.c index 82251c2..b3d4e2b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5192,8 +5192,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) #else #ifndef nsecs_to_cputime -# define nsecs_to_cputime(__nsecs) \ - msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC)) +# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) #endif void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) diff --git a/kernel/time.c b/kernel/time.c index 2e2e469..8047980 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -662,6 +662,36 @@ u64 nsec_to_clock_t(u64 x) #endif } +/** + * nsecs_to_jiffies - Convert nsecs in u64 to jiffies + * + * @n: nsecs in u64 + * + * Unlike {m,u}secs_to_jiffies, type of input is not unsigned int but u64. + * And this doesn't return MAX_JIFFY_OFFSET since this function is designed + * for scheduler, not for use in device drivers to calculate timeout value. + * + * note: + * NSEC_PER_SEC = 10^9 = (5^9 * 2^9) = (1953125 * 512) + * ULLONG_MAX ns = 18446744073.709551615 secs = about 584 years + */ +unsigned long nsecs_to_jiffies(u64 n) +{ +#if (NSEC_PER_SEC % HZ) == 0 + /* Common case, HZ = 100, 128, 200, 250, 256, 500, 512, 1000 etc. */ + return div_u64(n, NSEC_PER_SEC / HZ); +#elif (HZ % 512) == 0 + /* overflow after 292 years if HZ = 1024 */ + return div_u64(n * HZ / 512, NSEC_PER_SEC / 512); +#else + /* + * Generic case - optimized for cases where HZ is a multiple of 3. + * overflow after 64.99 years, exact for HZ = 60, 72, 90, 120 etc. + */ + return div_u64(n * 9, (9ull * NSEC_PER_SEC + HZ / 2) / HZ); +#endif +} + #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void) { -- cgit v0.10.2 From 4d795fb17a02a87e35782773b88b7a63acfbeaae Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Nov 2009 13:11:46 +0100 Subject: tracing: Fix kmem event exports Commit 53d0422 ("tracing: Convert some kmem events to DEFINE_EVENT") moved the kmem tracepoint creation from util.c to page_alloc.c, but forgot to move the exports. Move them back. Cc: Li Zefan Cc: Pekka Enberg Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mel Gorman LKML-Reference: <4B0E286A.2000405@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bdb22f5..2bc2ac6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -48,14 +48,12 @@ #include #include #include +#include #include #include #include "internal.h" -#define CREATE_TRACE_POINTS -#include - /* * Array of node states. */ diff --git a/mm/util.c b/mm/util.c index 15d1975..7c35ad9 100644 --- a/mm/util.c +++ b/mm/util.c @@ -6,6 +6,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate -- cgit v0.10.2 From b2e74a265ded1a185f762ebaab967e9e0d008dd8 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 26 Nov 2009 09:24:30 -0800 Subject: perf_events: Fix read() bogus counts when in error state When a pinned group cannot be scheduled it goes into error state. Normally a group cannot go out of error state without being explicitly re-enabled or disabled. There was a bug in per-thread mode, whereby upon termination of the thread, the group would transition from error to off leading to bogus counts and timing information returned by read(). Fix it by clearing the error state. Signed-off-by: Stephane Eranian Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: perfmon2-devel@lists.sourceforge.net LKML-Reference: <4b0eb9ce.0508d00a.573b.ffffeab6@mx.google.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index f8c7939..0b9ca2d 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -338,7 +338,16 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) event->group_leader->nr_siblings--; update_event_times(event); - event->state = PERF_EVENT_STATE_OFF; + + /* + * If event was in error state, then keep it + * that way, otherwise bogus counts will be + * returned on read(). The only way to get out + * of error state is by explicit re-enabling + * of the event + */ + if (event->state > PERF_EVENT_STATE_OFF) + event->state = PERF_EVENT_STATE_OFF; /* * If this was a group event with sibling events then -- cgit v0.10.2 From e5af02261668350b43eb7381648930bde8e872f7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 27 Nov 2009 13:28:20 +1100 Subject: softlockup: Fix hung_task_check_count sysctl I'm seeing spikes of up to 0.5ms in khungtaskd on a large machine. To reduce this source of jitter I tried setting hung_task_check_count to 0: # echo 0 > /proc/sys/kernel/hung_task_check_count which didn't have the intended response. Change to a post increment of max_count, so a value of 0 means check 0 tasks. Signed-off-by: Anton Blanchard Acked-by: Frederic Weisbecker Cc: msb@google.com LKML-Reference: <20091127022820.GU32182@kryten> Signed-off-by: Ingo Molnar diff --git a/kernel/hung_task.c b/kernel/hung_task.c index d4e8417..0c642d5 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -144,7 +144,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) rcu_read_lock(); do_each_thread(g, t) { - if (!--max_count) + if (!max_count--) goto unlock; if (!--batch_count) { batch_count = HUNG_TASK_BATCHING; -- cgit v0.10.2 From 5fa10b28e57f94a90535cfeafe89dcee9f47d540 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Nov 2009 04:55:53 +0100 Subject: hw-breakpoints: Use struct perf_event_attr to define user breakpoints In-kernel user breakpoints are created using functions in which we pass breakpoint parameters as individual variables: address, length and type. Although it fits well for x86, this just does not scale across archictectures that may support this api later as these may have more or different needs. Pass in a perf_event_attr structure instead because it is meant to evolve as much as possible into a generic hardware breakpoint parameter structure. Reported-by: K.Prasad Signed-off-by: Frederic Weisbecker LKML-Reference: <1259294154-5197-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 75e0cd84..2941b32 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -593,6 +593,34 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) return dr7; } +static struct perf_event * +ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, + struct task_struct *tsk) +{ + int err; + int gen_len, gen_type; + DEFINE_BREAKPOINT_ATTR(attr); + + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ + if (!bp) + return ERR_PTR(-EINVAL); + + err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); + if (err) + return ERR_PTR(err); + + attr = bp->attr; + attr.bp_len = gen_len; + attr.bp_type = gen_type; + attr.disabled = 0; + + return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); +} + /* * Handle ptrace writes to debug register 7. */ @@ -603,7 +631,6 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) int i, orig_ret = 0, rc = 0; int enabled, second_pass = 0; unsigned len, type; - int gen_len, gen_type; struct perf_event *bp; data &= ~DR_CONTROL_RESERVED; @@ -634,33 +661,12 @@ restore: continue; } - /* - * We shoud have at least an inactive breakpoint at this - * slot. It means the user is writing dr7 without having - * written the address register first - */ - if (!bp) { - rc = -EINVAL; - break; - } - - rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); - if (rc) - break; - - /* - * This is a temporary thing as bp is unregistered/registered - * to simulate modification - */ - bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, - gen_type, bp->callback, - tsk, true); - thread->ptrace_bps[i] = NULL; + bp = ptrace_modify_breakpoint(bp, len, type, tsk); /* Incorrect bp, or we have a bug in bp API */ if (IS_ERR(bp)) { rc = PTR_ERR(bp); - bp = NULL; + thread->ptrace_bps[i] = NULL; break; } thread->ptrace_bps[i] = bp; @@ -707,24 +713,26 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, { struct perf_event *bp; struct thread_struct *t = &tsk->thread; + DEFINE_BREAKPOINT_ATTR(attr); if (!t->ptrace_bps[nr]) { /* * Put stub len and type to register (reserve) an inactive but * correct bp */ - bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, - HW_BREAKPOINT_W, - ptrace_triggered, tsk, - false); + attr.bp_addr = addr; + attr.bp_len = HW_BREAKPOINT_LEN_1; + attr.bp_type = HW_BREAKPOINT_W; + attr.disabled = 1; + + bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); } else { bp = t->ptrace_bps[nr]; t->ptrace_bps[nr] = NULL; - bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, - bp->attr.bp_type, - bp->callback, - tsk, - bp->attr.disabled); + + attr = bp->attr; + attr.bp_addr = addr; + bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); } /* * CHECKME: the previous code returned -EIO if the addr wasn't a diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index c9f7f7c..5da472e 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -20,6 +20,14 @@ enum { #ifdef CONFIG_HAVE_HW_BREAKPOINT +/* As it's for in-kernel or ptrace use, we want it to be pinned */ +#define DEFINE_BREAKPOINT_ATTR(name) \ +struct perf_event_attr name = { \ + .type = PERF_TYPE_BREAKPOINT, \ + .size = sizeof(name), \ + .pinned = 1, \ +}; + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -36,22 +44,16 @@ static inline int hw_breakpoint_len(struct perf_event *bp) } extern struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, +register_user_hw_breakpoint(struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active); + struct task_struct *tsk); /* FIXME: only change from the attr, and don't unregister */ extern struct perf_event * modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, + struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active); + struct task_struct *tsk); /* * Kernel breakpoints are not associated with any particular thread. @@ -89,20 +91,14 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) #else /* !CONFIG_HAVE_HW_BREAKPOINT */ static inline struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, +register_user_hw_breakpoint(struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) { return NULL; } + struct task_struct *tsk) { return NULL; } static inline struct perf_event * modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, + struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) { return NULL; } + struct task_struct *tsk) { return NULL; } static inline struct perf_event * register_wide_hw_breakpoint_cpu(unsigned long addr, int len, diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 32e1018..2a47514 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -289,90 +289,32 @@ int register_perf_hw_breakpoint(struct perf_event *bp) return __register_perf_hw_breakpoint(bp); } -/* - * Register a breakpoint bound to a task and a given cpu. - * If cpu is -1, the breakpoint is active for the task in every cpu - * If the task is -1, the breakpoint is active for every tasks in the given - * cpu. - */ -static struct perf_event * -register_user_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - pid_t pid, - int cpu, - bool active) -{ - struct perf_event_attr *attr; - struct perf_event *bp; - - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - if (!attr) - return ERR_PTR(-ENOMEM); - - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(*attr); - attr->bp_addr = addr; - attr->bp_len = len; - attr->bp_type = type; - /* - * Such breakpoints are used by debuggers to trigger signals when - * we hit the excepted memory op. We can't miss such events, they - * must be pinned. - */ - attr->pinned = 1; - - if (!active) - attr->disabled = 1; - - bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); - kfree(attr); - - return bp; -} - /** * register_user_hw_breakpoint - register a hardware breakpoint for user space - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) + * @attr: breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @active: should we activate it while registering it - * */ struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, +register_user_hw_breakpoint(struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) + struct task_struct *tsk) { - return register_user_hw_breakpoint_cpu(addr, len, type, triggered, - tsk->pid, -1, active); + return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); /** * modify_user_hw_breakpoint - modify a user-space hardware breakpoint * @bp: the breakpoint structure to modify - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) + * @attr: new breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @active: should we activate it while registering it */ struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, +modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr, perf_callback_t triggered, - struct task_struct *tsk, - bool active) + struct task_struct *tsk) { /* * FIXME: do it without unregistering @@ -381,8 +323,7 @@ modify_user_hw_breakpoint(struct perf_event *bp, */ unregister_hw_breakpoint(bp); - return register_user_hw_breakpoint(addr, len, type, triggered, - tsk, active); + return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); @@ -406,8 +347,16 @@ register_kernel_hw_breakpoint_cpu(unsigned long addr, int cpu, bool active) { - return register_user_hw_breakpoint_cpu(addr, len, type, triggered, - -1, cpu, active); + DEFINE_BREAKPOINT_ATTR(attr); + + attr.bp_addr = addr; + attr.bp_len = len; + attr.bp_type = type; + + if (!active) + attr.disabled = 1; + + return perf_event_create_kernel_counter(&attr, cpu, -1, triggered); } /** -- cgit v0.10.2 From dd1853c3f493f6d22d9e5390b192a07b73d2ac0a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Nov 2009 04:55:54 +0100 Subject: hw-breakpoints: Use struct perf_event_attr to define kernel breakpoints Kernel breakpoints are created using functions in which we pass breakpoint parameters as individual variables: address, length and type. Although it fits well for x86, this just does not scale across architectures that may support this api later as these may have more or different needs. Pass in a perf_event_attr structure instead because it is meant to evolve as much as possible into a generic hardware breakpoint parameter structure. Reported-by: K.Prasad Signed-off-by: Frederic Weisbecker LKML-Reference: <1259294154-5197-2-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 5da472e..a03daed 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -28,6 +28,13 @@ struct perf_event_attr name = { \ .pinned = 1, \ }; +static inline void hw_breakpoint_init(struct perf_event_attr *attr) +{ + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(*attr); + attr->pinned = 1; +} + static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; @@ -59,19 +66,13 @@ modify_user_hw_breakpoint(struct perf_event *bp, * Kernel breakpoints are not associated with any particular thread. */ extern struct perf_event * -register_wide_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, +register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_callback_t triggered, - int cpu, - bool active); + int cpu); extern struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active); +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered); extern int register_perf_hw_breakpoint(struct perf_event *bp); extern int __register_perf_hw_breakpoint(struct perf_event *bp); @@ -100,18 +101,12 @@ modify_user_hw_breakpoint(struct perf_event *bp, perf_callback_t triggered, struct task_struct *tsk) { return NULL; } static inline struct perf_event * -register_wide_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, +register_wide_hw_breakpoint_cpu(struct perf_event_attr *attr, perf_callback_t triggered, - int cpu, - bool active) { return NULL; } + int cpu) { return NULL; } static inline struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active) { return NULL; } +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered) { return NULL; } static inline int register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } static inline int diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 2a47514..cf5ee16 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -339,42 +339,16 @@ void unregister_hw_breakpoint(struct perf_event *bp) } EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); -static struct perf_event * -register_kernel_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - int cpu, - bool active) -{ - DEFINE_BREAKPOINT_ATTR(attr); - - attr.bp_addr = addr; - attr.bp_len = len; - attr.bp_type = type; - - if (!active) - attr.disabled = 1; - - return perf_event_create_kernel_counter(&attr, cpu, -1, triggered); -} - /** * register_wide_hw_breakpoint - register a wide breakpoint in the kernel - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) + * @attr: breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint - * @active: should we activate it while registering it * * @return a set of per_cpu pointers to perf events */ struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active) +register_wide_hw_breakpoint(struct perf_event_attr *attr, + perf_callback_t triggered) { struct perf_event **cpu_events, **pevent, *bp; long err; @@ -386,8 +360,7 @@ register_wide_hw_breakpoint(unsigned long addr, for_each_possible_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); - bp = register_kernel_hw_breakpoint_cpu(addr, len, type, - triggered, cpu, active); + bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); *pevent = bp; diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index c538b15..ddfa0fd 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -42,9 +42,7 @@ struct trace_ksym { struct perf_event **ksym_hbp; - unsigned long ksym_addr; - int type; - int len; + struct perf_event_attr attr; #ifdef CONFIG_PROFILE_KSYM_TRACER unsigned long counter; #endif @@ -71,7 +69,7 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) rcu_read_lock(); hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { - if ((entry->ksym_addr == hbp_hit_addr) && + if ((entry->attr.bp_addr == hbp_hit_addr) && (entry->counter <= MAX_UL_INT)) { entry->counter++; break; @@ -192,14 +190,15 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) if (!entry) return -ENOMEM; - entry->type = op; - entry->ksym_addr = addr; - entry->len = HW_BREAKPOINT_LEN_4; + hw_breakpoint_init(&entry->attr); + + entry->attr.bp_type = op; + entry->attr.bp_addr = addr; + entry->attr.bp_len = HW_BREAKPOINT_LEN_4; ret = -EAGAIN; - entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, - entry->len, entry->type, - ksym_hbp_handler, true); + entry->ksym_hbp = register_wide_hw_breakpoint(&entry->attr, + ksym_hbp_handler); if (IS_ERR(entry->ksym_hbp)) { ret = PTR_ERR(entry->ksym_hbp); @@ -236,12 +235,12 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, mutex_lock(&ksym_tracer_mutex); hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); - if (entry->type == HW_BREAKPOINT_R) + ret = trace_seq_printf(s, "%pS:", (void *)entry->attr.bp_addr); + if (entry->attr.bp_type == HW_BREAKPOINT_R) ret = trace_seq_puts(s, "r--\n"); - else if (entry->type == HW_BREAKPOINT_W) + else if (entry->attr.bp_type == HW_BREAKPOINT_W) ret = trace_seq_puts(s, "-w-\n"); - else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) + else if (entry->attr.bp_type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) ret = trace_seq_puts(s, "rw-\n"); WARN_ON_ONCE(!ret); } @@ -317,9 +316,9 @@ static ssize_t ksym_trace_filter_write(struct file *file, ret = -EINVAL; hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - if (entry->ksym_addr == ksym_addr) { + if (entry->attr.bp_addr == ksym_addr) { /* Check for malformed request: (6) */ - if (entry->type != op) + if (entry->attr.bp_type != op) changed = 1; else goto out; @@ -328,13 +327,12 @@ static ssize_t ksym_trace_filter_write(struct file *file, } if (changed) { unregister_wide_hw_breakpoint(entry->ksym_hbp); - entry->type = op; + entry->attr.bp_type = op; ret = 0; if (op > 0) { entry->ksym_hbp = - register_wide_hw_breakpoint(entry->ksym_addr, - entry->len, entry->type, - ksym_hbp_handler, true); + register_wide_hw_breakpoint(&entry->attr, + ksym_hbp_handler); if (IS_ERR(entry->ksym_hbp)) ret = PTR_ERR(entry->ksym_hbp); else @@ -489,7 +487,7 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); - access_type = entry->type; + access_type = entry->attr.bp_type; switch (access_type) { case HW_BREAKPOINT_R: @@ -505,7 +503,7 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) seq_puts(m, " NA "); } - if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0) + if (lookup_symbol_name(entry->attr.bp_addr, fn_name) >= 0) seq_printf(m, " %-36s", fn_name); else seq_printf(m, " %-36s", ""); diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c index ee7f9fb..2952550 100644 --- a/samples/hw_breakpoint/data_breakpoint.c +++ b/samples/hw_breakpoint/data_breakpoint.c @@ -51,13 +51,13 @@ static void sample_hbp_handler(struct perf_event *temp, void *data) static int __init hw_break_module_init(void) { int ret; - unsigned long addr; + DEFINE_BREAKPOINT_ATTR(attr); - addr = kallsyms_lookup_name(ksym_name); + attr.bp_addr = kallsyms_lookup_name(ksym_name); + attr.bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; - sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4, - HW_BREAKPOINT_W | HW_BREAKPOINT_R, - sample_hbp_handler, true); + sample_hbp = register_wide_hw_breakpoint(&attr, sample_hbp_handler); if (IS_ERR(sample_hbp)) { ret = PTR_ERR(sample_hbp); goto fail; -- cgit v0.10.2 From 0f1ef51d244809f417bdf45cdb00109fb6005672 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 26 Nov 2009 15:49:33 +0800 Subject: trace_syscalls: Add syscall nr field Field syscall number is missed in syscall_enter_define_fields()/ syscall_exit_define_fields(). Syscall number is also needed for event filter or other users. Signed-off-by: Lai Jiangshan Acked-by: Frederic Weisbecker Cc: Jason Baron Cc: Steven Rostedt LKML-Reference: <4B0E330D.1070206@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 9189cbe..63aa807 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -261,6 +261,10 @@ int syscall_enter_define_fields(struct ftrace_event_call *call) if (ret) return ret; + ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); + if (ret) + return ret; + for (i = 0; i < meta->nb_args; i++) { ret = trace_define_field(call, meta->types[i], meta->args[i], offset, @@ -281,6 +285,10 @@ int syscall_exit_define_fields(struct ftrace_event_call *call) if (ret) return ret; + ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER); + if (ret) + return ret; + ret = trace_define_field(call, SYSCALL_FIELD(long, ret), FILTER_OTHER); -- cgit v0.10.2 From abab9d37d2a826fcf588c5f30152dbe05c40111c Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 25 Nov 2009 16:32:21 +0800 Subject: trace_kprobes: Fix memory leak tp->nr_args is not set before we "goto error", it causes memory leak for free_trace_probe() use tp->nr_args to free memory of args. Signed-off-by: Lai Jiangshan Acked-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0CEB95.2060107@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 79ce6a2..82e8583 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -704,10 +704,12 @@ static int create_trace_probe(int argc, char **argv) ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); if (ret) { pr_info("Parse error at argument%d. (%d)\n", i, ret); + kfree(tp->args[i].name); goto error; } + + tp->nr_args++; } - tp->nr_args = i; ret = register_trace_probe(tp); if (ret) -- cgit v0.10.2 From 3d9b2e1ddf42dd3df38af7794fa5e39cce760f3b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 25 Nov 2009 16:32:47 +0800 Subject: trace_kprobes: Always show group name Sometimes the group name is not "kprobes", It'll be better if we can read it from tracing/kprobe_events. # echo 'r:laijs/vfs_read vfs_read %ax' > kprobe_events # cat kprobe_events r:laijs/vfs_read vfs_read %ax=%ax Signed-off-by: Lai Jiangshan Acked-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0CEBAF.6000104@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 82e8583..96e1944 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -760,7 +760,7 @@ static int probes_seq_show(struct seq_file *m, void *v) char buf[MAX_ARGSTR_LEN + 1]; seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); - seq_printf(m, ":%s", tp->call.name); + seq_printf(m, ":%s/%s", tp->call.system, tp->call.name); if (tp->symbol) seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); -- cgit v0.10.2 From 52a11f354970e7301e1d1a029b87535be45abec9 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 25 Nov 2009 16:33:15 +0800 Subject: trace_kprobes: Don't output zero offset "symbol_name+0" is not so friendly. It makes the output longer. Signed-off-by: Lai Jiangshan Acked-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B0CEBCB.7080309@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 96e1944..72d0c65 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -243,7 +243,11 @@ static int probe_arg_string(char *buf, size_t n, struct fetch_func *ff) ret = snprintf(buf, n, "@0x%p", ff->data); else if (ff->func == fetch_symbol) { struct symbol_cache *sc = ff->data; - ret = snprintf(buf, n, "@%s%+ld", sc->symbol, sc->offset); + if (sc->offset) + ret = snprintf(buf, n, "@%s%+ld", sc->symbol, + sc->offset); + else + ret = snprintf(buf, n, "@%s", sc->symbol); } else if (ff->func == fetch_retvalue) ret = snprintf(buf, n, "$retval"); else if (ff->func == fetch_stack_address) @@ -762,10 +766,12 @@ static int probes_seq_show(struct seq_file *m, void *v) seq_printf(m, "%c", probe_is_return(tp) ? 'r' : 'p'); seq_printf(m, ":%s/%s", tp->call.system, tp->call.name); - if (tp->symbol) + if (!tp->symbol) + seq_printf(m, " 0x%p", tp->rp.kp.addr); + else if (tp->rp.kp.offset) seq_printf(m, " %s+%u", probe_symbol(tp), tp->rp.kp.offset); else - seq_printf(m, " 0x%p", tp->rp.kp.addr); + seq_printf(m, " %s", probe_symbol(tp)); for (i = 0; i < tp->nr_args; i++) { ret = probe_arg_string(buf, MAX_ARGSTR_LEN, &tp->args[i].fetch); -- cgit v0.10.2 From 6a9401a7ac13e62ef2baf4d46e022d303edc3050 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 13:22:21 +0100 Subject: x86/amd-iommu: Separate internal interface definitions This patch moves all function declarations which are only used inside the driver code to a seperate header file. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index b8ef2ee..0891338 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -23,15 +23,13 @@ #include #ifdef CONFIG_AMD_IOMMU -extern int amd_iommu_init_dma_ops(void); -extern int amd_iommu_init_passthrough(void); + extern void amd_iommu_detect(void); -extern irqreturn_t amd_iommu_int_handler(int irq, void *data); -extern void amd_iommu_flush_all_domains(void); -extern void amd_iommu_flush_all_devices(void); -extern void amd_iommu_apply_erratum_63(u16 devid); + #else + static inline void amd_iommu_detect(void) { } + #endif #endif /* _ASM_X86_AMD_IOMMU_H */ diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h new file mode 100644 index 0000000..84786fb --- /dev/null +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2009 Advanced Micro Devices, Inc. + * Author: Joerg Roedel + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_X86_AMD_IOMMU_PROTO_H +#define _ASM_X86_AMD_IOMMU_PROTO_H + +struct amd_iommu; + +extern int amd_iommu_init_dma_ops(void); +extern int amd_iommu_init_passthrough(void); +extern irqreturn_t amd_iommu_int_handler(int irq, void *data); +extern void amd_iommu_flush_all_domains(void); +extern void amd_iommu_flush_all_devices(void); +extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); + +#ifndef CONFIG_AMD_IOMMU_STATS + +static inline void amd_iommu_stats_init(void) { } + +#endif /* !CONFIG_AMD_IOMMU_STATS */ + +#endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 2a2cc7a..27db7f9 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -462,11 +462,6 @@ struct __iommu_counter { #define ADD_STATS_COUNTER(name, x) #define SUB_STATS_COUNTER(name, x) -static inline void amd_iommu_stats_init(void) { } - #endif /* CONFIG_AMD_IOMMU_STATS */ -/* some function prototypes */ -extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); - #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b74b212..50d2b05 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 72bdbda..db30cfe 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From bf3118c1276d27fe9e84aa42382da25ee0750777 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 13:39:19 +0100 Subject: x86/amd-iommu: Update copyright headers This patch updates the copyright headers in the relevant AMD IOMMU driver files to match the date of the latest changes. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index 0891338..5af2982 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel * Leo Duran * diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 27db7f9..df5e9c8 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel * Leo Duran * diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 50d2b05..7fe28be 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel * Leo Duran * diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index db30cfe..cee1142 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. * Author: Joerg Roedel * Leo Duran * -- cgit v0.10.2 From bb52777ec4d736c2d7c4f037b32d4eeeb172ed89 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 14:31:51 +0100 Subject: x86/amd-iommu: Add an index field to struct amd_iommu This patch adds an index field to struct amd_iommu which can be used to lookup it up in an array. This index will be used in struct protection_domain to keep track which protection domain has devices behind which IOMMU. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index df5e9c8..ab3e7bf 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -25,6 +25,11 @@ #include /* + * Maximum number of IOMMUs supported + */ +#define MAX_IOMMUS 32 + +/* * some size calculation constants */ #define DEV_TABLE_ENTRY_SIZE 32 @@ -291,6 +296,9 @@ struct dma_ops_domain { struct amd_iommu { struct list_head list; + /* Index within the IOMMU array */ + int index; + /* locks the accesses to the hardware */ spinlock_t lock; @@ -357,6 +365,15 @@ struct amd_iommu { extern struct list_head amd_iommu_list; /* + * Array with pointers to each IOMMU struct + * The indices are referenced in the protection domains + */ +extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; + +/* Number of IOMMUs present in the system */ +extern int amd_iommus_present; + +/* * Structure defining one entry in the device table */ struct dev_table_entry { diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index cee1142..8567d16 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -137,6 +137,10 @@ bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the system */ +/* Array to assign indices to IOMMUs*/ +struct amd_iommu *amd_iommus[MAX_IOMMUS]; +int amd_iommus_present; + /* * Pointer to the device table which is shared by all AMD IOMMUs * it is indexed by the PCI device id or the HT unit id and contains @@ -840,7 +844,18 @@ static void __init free_iommu_all(void) static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) { spin_lock_init(&iommu->lock); + + /* Add IOMMU to internal data structures */ list_add_tail(&iommu->list, &amd_iommu_list); + iommu->index = amd_iommus_present++; + + if (unlikely(iommu->index >= MAX_IOMMUS)) { + WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); + return -ENOSYS; + } + + /* Index is fine - add IOMMU to the array */ + amd_iommus[iommu->index] = iommu; /* * Copy data from ACPI table entry to the iommu struct -- cgit v0.10.2 From c459611424d8b8396060eb766e23bd0c70c993bc Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 14:57:32 +0100 Subject: x86/amd-iommu: Add per IOMMU reference counting This patch adds reference counting for protection domains per IOMMU. This allows a smarter TLB flushing strategy. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ab3e7bf..e68b148 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -238,7 +238,9 @@ struct protection_domain { unsigned long flags; /* flags to find out type of domain */ bool updated; /* complete domain flush required */ unsigned dev_cnt; /* devices assigned to this domain */ + unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ void *priv; /* private data */ + }; /* diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 7fe28be..8c38f00 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1175,7 +1175,9 @@ static void __attach_device(struct amd_iommu *iommu, /* update DTE entry */ set_dte_entry(devid, domain); - domain->dev_cnt += 1; + /* Do reference counting */ + domain->dev_iommu[iommu->index] += 1; + domain->dev_cnt += 1; /* ready */ spin_unlock(&domain->lock); @@ -1209,6 +1211,9 @@ static void attach_device(struct amd_iommu *iommu, */ static void __detach_device(struct protection_domain *domain, u16 devid) { + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + BUG_ON(!iommu); /* lock domain */ spin_lock(&domain->lock); @@ -1223,8 +1228,9 @@ static void __detach_device(struct protection_domain *domain, u16 devid) amd_iommu_apply_erratum_63(devid); - /* decrease reference counter */ - domain->dev_cnt -= 1; + /* decrease reference counters */ + domain->dev_iommu[iommu->index] -= 1; + domain->dev_cnt -= 1; /* ready */ spin_unlock(&domain->lock); -- cgit v0.10.2 From 0518a3a4585cb3eeeaf14ca57131f11d252130c6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 16:00:05 +0100 Subject: x86/amd-iommu: Add function to complete a tlb flush This patch adds a function to the AMD IOMMU driver which completes all queued commands an all IOMMUs a specific domain has devices attached on. This is required in a later patch when per-domain flushing is implemented. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 8c38f00..8fa5cc3 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -376,6 +376,22 @@ out: return 0; } +static void iommu_flush_complete(struct protection_domain *domain) +{ + int i; + + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need to wait for completion of all commands. + */ + iommu_completion_wait(amd_iommus[i]); + } +} + /* * Command send function for invalidating a device table entry */ @@ -1758,7 +1774,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, if (addr == DMA_ERROR_CODE) goto out; - iommu_completion_wait(iommu); + iommu_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1791,7 +1807,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1863,7 +1879,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1914,7 +1930,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1969,7 +1985,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out_free; } - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); @@ -2010,7 +2026,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - iommu_completion_wait(iommu); + iommu_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); -- cgit v0.10.2 From 6de8ad9b9ee0ec5b52ec8ec41401833e5e89186f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 18:30:32 +0100 Subject: x86/amd-iommu: Make iommu_flush_pages aware of multiple IOMMUs This patch extends the iommu_flush_pages function to flush the TLB entries on all IOMMUs the domain has devices on. This basically gives up the former assumption that dma_ops domains are only bound to one IOMMU in the system. For dma_ops domains this is still true but not for IOMMU-API managed domains. Giving this assumption up for dma_ops domains too allows code simplification. Further it splits out the main logic into a generic function which can be used by iommu_flush_tlb too. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 8fa5cc3..7c06e57 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -447,10 +447,10 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, * It invalidates a single PTE if the range to flush is within a single * page. Otherwise it flushes the whole TLB of the IOMMU. */ -static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, - u64 address, size_t size) +static void __iommu_flush_pages(struct protection_domain *domain, + u64 address, size_t size, int pde) { - int s = 0; + int s = 0, i; unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); address &= PAGE_MASK; @@ -464,9 +464,26 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, s = 1; } - iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s); - return 0; + for (i = 0; i < amd_iommus_present; ++i) { + if (!domain->dev_iommu[i]) + continue; + + /* + * Devices of this domain are behind this IOMMU + * We need a TLB flush + */ + iommu_queue_inv_iommu_pages(amd_iommus[i], address, + domain->id, pde, s); + } + + return; +} + +static void iommu_flush_pages(struct protection_domain *domain, + u64 address, size_t size) +{ + __iommu_flush_pages(domain, address, size, 0); } /* Flush the whole IO/TLB for a given protection domain */ @@ -1683,7 +1700,7 @@ retry: iommu_flush_tlb(iommu, dma_dom->domain.id); dma_dom->need_flush = false; } else if (unlikely(iommu_has_npcache(iommu))) - iommu_flush_pages(iommu, dma_dom->domain.id, address, size); + iommu_flush_pages(&dma_dom->domain, address, size); out: return address; @@ -1731,7 +1748,7 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { - iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + iommu_flush_pages(&dma_dom->domain, dma_addr, size); dma_dom->need_flush = false; } } -- cgit v0.10.2 From dcd1e92e405449ecc5e8bd8fcfebf3b2a13d3d37 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 15:30:58 +0100 Subject: x86/amd-iommu: Use __iommu_flush_pages for tlb flushes This patch re-implements iommu_flush_tlb functions to use the __iommu_flush_pages logic. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 7c06e57..c55aa07 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -451,7 +451,7 @@ static void __iommu_flush_pages(struct protection_domain *domain, u64 address, size_t size, int pde) { int s = 0, i; - unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); + unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); address &= PAGE_MASK; @@ -487,23 +487,15 @@ static void iommu_flush_pages(struct protection_domain *domain, } /* Flush the whole IO/TLB for a given protection domain */ -static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_tlb(struct protection_domain *domain) { - u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - - INC_STATS_COUNTER(domain_flush_single); - - iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); + __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) +static void iommu_flush_tlb_pde(struct protection_domain *domain) { - u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - - INC_STATS_COUNTER(domain_flush_single); - - iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1); + __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } /* @@ -1236,7 +1228,7 @@ static void attach_device(struct amd_iommu *iommu, * here to evict all dirty stuff. */ iommu_queue_inv_dev_entry(iommu, devid); - iommu_flush_tlb_pde(iommu, domain->id); + iommu_flush_tlb_pde(domain); } /* @@ -1697,7 +1689,7 @@ retry: ADD_STATS_COUNTER(alloced_io_mem, size); if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { - iommu_flush_tlb(iommu, dma_dom->domain.id); + iommu_flush_tlb(&dma_dom->domain); dma_dom->need_flush = false; } else if (unlikely(iommu_has_npcache(iommu))) iommu_flush_pages(&dma_dom->domain, address, size); -- cgit v0.10.2 From 601367d76bd19b7eea2286ae99e5b1cb5d74f38d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 16:08:55 +0100 Subject: x86/amd-iommu: Remove iommu_flush_domain function This iommu_flush_tlb_pde function does essentially the same. So the iommu_flush_domain function is redundant and can be removed. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c55aa07..b2c19f4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -528,20 +528,6 @@ static void flush_all_domains_on_iommu(struct amd_iommu *iommu) } -/* - * This function is used to flush the IO/TLB for a given protection domain - * on every IOMMU in the system - */ -static void iommu_flush_domain(u16 domid) -{ - struct amd_iommu *iommu; - - INC_STATS_COUNTER(domain_flush_all); - - for_each_iommu(iommu) - flush_domain_on_iommu(iommu, domid); -} - void amd_iommu_flush_all_domains(void) { struct amd_iommu *iommu; @@ -1464,7 +1450,7 @@ static void update_domain(struct protection_domain *domain) update_device_table(domain); flush_devices_by_domain(domain); - iommu_flush_domain(domain->id); + iommu_flush_tlb_pde(domain); domain->updated = false; } @@ -2377,7 +2363,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, iova += PAGE_SIZE; } - iommu_flush_domain(domain->id); + iommu_flush_tlb_pde(domain); } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, -- cgit v0.10.2 From aeb26f55337d4310840c8adc3ec7d6aebb714472 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 16:44:01 +0100 Subject: x86/amd-iommu: Implement protection domain list This patch adds code to keep a global list of all protection domains. This allows to simplify the resume code. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index e68b148..b332b7f 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -231,6 +231,7 @@ extern bool amd_iommu_dump; * independent of their use. */ struct protection_domain { + struct list_head list; /* for list of all protection domains */ spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ int mode; /* paging mode (0-6 levels) */ @@ -376,6 +377,12 @@ extern struct amd_iommu *amd_iommus[MAX_IOMMUS]; extern int amd_iommus_present; /* + * Declarations for the global list of all protection domains + */ +extern spinlock_t amd_iommu_pd_lock; +extern struct list_head amd_iommu_pd_list; + +/* * Structure defining one entry in the device table */ struct dev_table_entry { diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b2c19f4..0c4319b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -985,6 +985,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, * ****************************************************************************/ +/* + * This function adds a protection domain to the global protection domain list + */ +static void add_domain_to_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_add(&domain->list, &amd_iommu_pd_list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + +/* + * This function removes a protection domain to the global + * protection domain list + */ +static void del_domain_from_list(struct protection_domain *domain) +{ + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); + list_del(&domain->list); + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); +} + static u16 domain_id_alloc(void) { unsigned long flags; @@ -1073,6 +1098,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) if (!dom) return; + del_domain_from_list(&dom->domain); + free_pagetable(&dom->domain); for (i = 0; i < APERTURE_MAX_RANGES; ++i) { @@ -1113,6 +1140,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) dma_dom->need_flush = false; dma_dom->target_dev = 0xffff; + add_domain_to_list(&dma_dom->domain); + if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) goto free_dma_dom; @@ -2188,6 +2217,8 @@ static void protection_domain_free(struct protection_domain *domain) if (!domain) return; + del_domain_from_list(domain); + if (domain->id) domain_id_free(domain->id); @@ -2207,6 +2238,8 @@ static struct protection_domain *protection_domain_alloc(void) if (!domain->id) goto out_err; + add_domain_to_list(domain); + return domain; out_err: diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8567d16..73d5173 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -142,6 +142,12 @@ struct amd_iommu *amd_iommus[MAX_IOMMUS]; int amd_iommus_present; /* + * List of protection domains - used during resume + */ +LIST_HEAD(amd_iommu_pd_list); +spinlock_t amd_iommu_pd_lock; + +/* * Pointer to the device table which is shared by all AMD IOMMUs * it is indexed by the PCI device id or the HT unit id and contains * information about the domain the device belongs to as well as the @@ -1263,6 +1269,8 @@ static int __init amd_iommu_init(void) */ amd_iommu_pd_alloc_bitmap[0] = 1; + spin_lock_init(&amd_iommu_pd_lock); + /* * now the data structures are allocated and basically initialized * start the real acpi table scan -- cgit v0.10.2 From e3306664eb307ae4cc93211cd9f12d0dbd49de65 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 16:48:58 +0100 Subject: x86/amd-iommu: Reimplement amd_iommu_flush_all_domains() This patch reimplementes the amd_iommu_flush_all_domains function to use the global protection domain list instead of flushing every domain on every IOMMU. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0c4319b..5141f56 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -530,10 +530,12 @@ static void flush_all_domains_on_iommu(struct amd_iommu *iommu) void amd_iommu_flush_all_domains(void) { - struct amd_iommu *iommu; + struct protection_domain *domain; - for_each_iommu(iommu) - flush_all_domains_on_iommu(iommu); + list_for_each_entry(domain, &amd_iommu_pd_list, list) { + iommu_flush_tlb_pde(domain); + iommu_flush_complete(domain); + } } static void flush_all_devices_for_iommu(struct amd_iommu *iommu) -- cgit v0.10.2 From 09b4280439ef6fdc55f1353a9135034336eb5d26 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 20 Nov 2009 17:02:44 +0100 Subject: x86/amd-iommu: Reimplement flush_all_domains_on_iommu() This patch reimplements the function flush_all_domains_on_iommu to use the global protection domain list. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5141f56..a1bd99d 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -499,43 +499,48 @@ static void iommu_flush_tlb_pde(struct protection_domain *domain) } /* - * This function flushes one domain on one IOMMU + * This function flushes all domains that have devices on the given IOMMU */ -static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) +static void flush_all_domains_on_iommu(struct amd_iommu *iommu) { - struct iommu_cmd cmd; + u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + struct protection_domain *domain; unsigned long flags; - __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, - domid, 1, 1); - - spin_lock_irqsave(&iommu->lock, flags); - __iommu_queue_command(iommu, &cmd); - __iommu_completion_wait(iommu); - __iommu_wait_for_completion(iommu); - spin_unlock_irqrestore(&iommu->lock, flags); -} - -static void flush_all_domains_on_iommu(struct amd_iommu *iommu) -{ - int i; + spin_lock_irqsave(&amd_iommu_pd_lock, flags); - for (i = 1; i < MAX_DOMAIN_ID; ++i) { - if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) + list_for_each_entry(domain, &amd_iommu_pd_list, list) { + if (domain->dev_iommu[iommu->index] == 0) continue; - flush_domain_on_iommu(iommu, i); + + spin_lock(&domain->lock); + iommu_queue_inv_iommu_pages(iommu, address, domain->id, 1, 1); + iommu_flush_complete(domain); + spin_unlock(&domain->lock); } + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } +/* + * This function uses heavy locking and may disable irqs for some time. But + * this is no issue because it is only called during resume. + */ void amd_iommu_flush_all_domains(void) { struct protection_domain *domain; + unsigned long flags; + + spin_lock_irqsave(&amd_iommu_pd_lock, flags); list_for_each_entry(domain, &amd_iommu_pd_list, list) { + spin_lock(&domain->lock); iommu_flush_tlb_pde(domain); iommu_flush_complete(domain); + spin_unlock(&domain->lock); } + + spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } static void flush_all_devices_for_iommu(struct amd_iommu *iommu) -- cgit v0.10.2 From 318afd41d2eca3224de3fd85a3b9a27a3010a98d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 18:32:38 +0100 Subject: x86/amd-iommu: Make np-cache a global flag The non-present cache flag was IOMMU local until now which doesn't make sense. Make this a global flag so we can remove the lase user of 'struct iommu' in the map/unmap path. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index b332b7f..4899f783 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -211,6 +211,9 @@ extern bool amd_iommu_dump; printk(KERN_INFO "AMD-Vi: " format, ## arg); \ } while(0); +/* global flag if IOMMUs cache non-present entries */ +extern bool amd_iommu_np_cache; + /* * Make iterating over all IOMMUs easier */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a1bd99d..5ebd24e 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -131,12 +131,6 @@ static void amd_iommu_stats_init(void) #endif -/* returns !0 if the IOMMU is caching non-present entries in its TLB */ -static int iommu_has_npcache(struct amd_iommu *iommu) -{ - return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); -} - /**************************************************************************** * * Interrupt handling functions @@ -1713,7 +1707,7 @@ retry: if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { iommu_flush_tlb(&dma_dom->domain); dma_dom->need_flush = false; - } else if (unlikely(iommu_has_npcache(iommu))) + } else if (unlikely(amd_iommu_np_cache)) iommu_flush_pages(&dma_dom->domain, address, size); out: diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 73d5173..fbe4c3c 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -141,6 +141,9 @@ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the struct amd_iommu *amd_iommus[MAX_IOMMUS]; int amd_iommus_present; +/* IOMMUs have a non-present cache? */ +bool amd_iommu_np_cache __read_mostly; + /* * List of protection domains - used during resume */ @@ -891,6 +894,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) init_iommu_from_acpi(iommu, h); init_iommu_devices(iommu); + if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) + amd_iommu_np_cache = true; + return pci_enable_device(iommu->dev); } -- cgit v0.10.2 From 420aef8a3acfc3e75427107e23d5a9bafd17c477 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 16:14:57 +0100 Subject: x86/amd-iommu: Use check_device for amd_iommu_dma_supported The check_device logic needs to include the dma_supported checks to be really sure. Merge the dma_supported logic into check_device and use it to implement dma_supported. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5ebd24e..ac27b1d 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1375,9 +1375,27 @@ static struct notifier_block device_nb = { */ static bool check_device(struct device *dev) { + u16 bdf; + struct pci_dev *pcidev; + if (!dev || !dev->dma_mask) return false; + /* No device or no PCI device */ + if (!dev || dev->bus != &pci_bus_type) + return false; + + pcidev = to_pci_dev(dev); + + bdf = calc_devid(pcidev->bus->number, pcidev->devfn); + + /* Out of our scope? */ + if (bdf > amd_iommu_last_bdf) + return false; + + if (amd_iommu_rlookup_table[bdf] == NULL) + return false; + return true; } @@ -2065,22 +2083,7 @@ free_mem: */ static int amd_iommu_dma_supported(struct device *dev, u64 mask) { - u16 bdf; - struct pci_dev *pcidev; - - /* No device or no PCI device */ - if (!dev || dev->bus != &pci_bus_type) - return 0; - - pcidev = to_pci_dev(dev); - - bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - - /* Out of our scope? */ - if (bdf > amd_iommu_last_bdf) - return 0; - - return 1; + return check_device(dev); } /* -- cgit v0.10.2 From f99c0f1c75f75924a6f19cb40a21ccefc6e8754d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 16:52:56 +0100 Subject: x86/amd-iommu: Use check_device in get_device_resources Every call-place of get_device_resources calls check_device before it. So call it from get_device_resources directly and simplify the code. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index ac27b1d..c5102eb 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1432,35 +1432,24 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) * If the device is not yet associated with a domain this is also done * in this function. */ -static int get_device_resources(struct device *dev, - struct amd_iommu **iommu, - struct protection_domain **domain, - u16 *bdf) +static bool get_device_resources(struct device *dev, + struct amd_iommu **iommu, + struct protection_domain **domain, + u16 *bdf) { struct dma_ops_domain *dma_dom; struct pci_dev *pcidev; u16 _bdf; - *iommu = NULL; - *domain = NULL; - *bdf = 0xffff; - - if (dev->bus != &pci_bus_type) - return 0; - - pcidev = to_pci_dev(dev); - _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - - /* device not translated by any IOMMU in the system? */ - if (_bdf > amd_iommu_last_bdf) - return 0; - - *bdf = amd_iommu_alias_table[_bdf]; + if (!check_device(dev)) + return false; - *iommu = amd_iommu_rlookup_table[*bdf]; - if (*iommu == NULL) - return 0; + pcidev = to_pci_dev(dev); + _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); + *bdf = amd_iommu_alias_table[_bdf]; + *iommu = amd_iommu_rlookup_table[*bdf]; *domain = domain_for_device(*bdf); + if (*domain == NULL) { dma_dom = find_protection_domain(*bdf); if (!dma_dom) @@ -1474,7 +1463,7 @@ static int get_device_resources(struct device *dev, if (domain_for_device(_bdf) == NULL) attach_device(*iommu, *domain, _bdf); - return 1; + return true; } static void update_device_table(struct protection_domain *domain) @@ -1797,17 +1786,12 @@ static dma_addr_t map_page(struct device *dev, struct page *page, INC_STATS_COUNTER(cnt_map_single); - if (!check_device(dev)) - return DMA_ERROR_CODE; - - dma_mask = *dev->dma_mask; - - get_device_resources(dev, &iommu, &domain, &devid); - - if (iommu == NULL || domain == NULL) + if (!get_device_resources(dev, &iommu, &domain, &devid)) /* device not handled by any AMD IOMMU */ return (dma_addr_t)paddr; + dma_mask = *dev->dma_mask; + if (!dma_ops_domain(domain)) return DMA_ERROR_CODE; @@ -1838,8 +1822,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, INC_STATS_COUNTER(cnt_unmap_single); - if (!check_device(dev) || - !get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &iommu, &domain, &devid)) /* device not handled by any AMD IOMMU */ return; @@ -1893,16 +1876,11 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_map_sg); - if (!check_device(dev)) - return 0; + if (!get_device_resources(dev, &iommu, &domain, &devid)) + return map_sg_no_iommu(dev, sglist, nelems, dir); dma_mask = *dev->dma_mask; - get_device_resources(dev, &iommu, &domain, &devid); - - if (!iommu || !domain) - return map_sg_no_iommu(dev, sglist, nelems, dir); - if (!dma_ops_domain(domain)) return 0; @@ -1958,8 +1936,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_unmap_sg); - if (!check_device(dev) || - !get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &iommu, &domain, &devid)) return; if (!dma_ops_domain(domain)) @@ -1994,24 +1971,22 @@ static void *alloc_coherent(struct device *dev, size_t size, INC_STATS_COUNTER(cnt_alloc_coherent); - if (!check_device(dev)) - return NULL; + if (!get_device_resources(dev, &iommu, &domain, &devid)) { + virt_addr = (void *)__get_free_pages(flag, get_order(size)); + *dma_addr = __pa(virt_addr); + return virt_addr; + } - if (!get_device_resources(dev, &iommu, &domain, &devid)) - flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + dma_mask = dev->coherent_dma_mask; + flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + flag |= __GFP_ZERO; - flag |= __GFP_ZERO; virt_addr = (void *)__get_free_pages(flag, get_order(size)); if (!virt_addr) return NULL; paddr = virt_to_phys(virt_addr); - if (!iommu || !domain) { - *dma_addr = (dma_addr_t)paddr; - return virt_addr; - } - if (!dma_ops_domain(domain)) goto out_free; @@ -2054,12 +2029,7 @@ static void free_coherent(struct device *dev, size_t size, INC_STATS_COUNTER(cnt_free_coherent); - if (!check_device(dev)) - return; - - get_device_resources(dev, &iommu, &domain, &devid); - - if (!iommu || !domain) + if (!get_device_resources(dev, &iommu, &domain, &devid)) goto free_mem; if (!dma_ops_domain(domain)) -- cgit v0.10.2 From 680525e06ddccda8c51bdddf532cd5b7d950c411 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 18:44:42 +0100 Subject: x86/amd-iommu: Remove iommu parameter from dma_ops_domain_(un)map The parameter is unused in these function so remove it from the parameter list. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c5102eb..da3f9d8 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1585,8 +1585,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, * This is the generic map function. It maps one 4kb page at paddr to * the given address in the DMA address space for the domain. */ -static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, - struct dma_ops_domain *dom, +static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, unsigned long address, phys_addr_t paddr, int direction) @@ -1620,8 +1619,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, /* * The generic unmapping function for on page in the DMA address space. */ -static void dma_ops_domain_unmap(struct amd_iommu *iommu, - struct dma_ops_domain *dom, +static void dma_ops_domain_unmap(struct dma_ops_domain *dom, unsigned long address) { struct aperture_range *aperture; @@ -1700,7 +1698,7 @@ retry: start = address; for (i = 0; i < pages; ++i) { - ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); + ret = dma_ops_domain_map(dma_dom, start, paddr, dir); if (ret == DMA_ERROR_CODE) goto out_unmap; @@ -1724,7 +1722,7 @@ out_unmap: for (--i; i >= 0; --i) { start -= PAGE_SIZE; - dma_ops_domain_unmap(iommu, dma_dom, start); + dma_ops_domain_unmap(dma_dom, start); } dma_ops_free_addresses(dma_dom, address, pages); @@ -1754,7 +1752,7 @@ static void __unmap_single(struct amd_iommu *iommu, start = dma_addr; for (i = 0; i < pages; ++i) { - dma_ops_domain_unmap(iommu, dma_dom, start); + dma_ops_domain_unmap(dma_dom, start); start += PAGE_SIZE; } -- cgit v0.10.2 From 576175c2503ae9b0f930ee9a6a0abaf7ef8956ad Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 19:08:46 +0100 Subject: x86/amd-iommu: Make alloc_new_range aware of multiple IOMMUs Since the assumption that an dma_ops domain is only bound to one IOMMU was given up we need to make alloc_new_range aware of it. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index da3f9d8..687f617 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -788,11 +788,11 @@ static u64 *fetch_pte(struct protection_domain *domain, * aperture in case of dma_ops domain allocation or address allocation * failure. */ -static int alloc_new_range(struct amd_iommu *iommu, - struct dma_ops_domain *dma_dom, +static int alloc_new_range(struct dma_ops_domain *dma_dom, bool populate, gfp_t gfp) { int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; + struct amd_iommu *iommu; int i; #ifdef CONFIG_IOMMU_STRESS @@ -832,14 +832,17 @@ static int alloc_new_range(struct amd_iommu *iommu, dma_dom->aperture_size += APERTURE_RANGE_SIZE; /* Intialize the exclusion range if necessary */ - if (iommu->exclusion_start && - iommu->exclusion_start >= dma_dom->aperture[index]->offset && - iommu->exclusion_start < dma_dom->aperture_size) { - unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = iommu_num_pages(iommu->exclusion_start, - iommu->exclusion_length, - PAGE_SIZE); - dma_ops_reserve_addresses(dma_dom, startpage, pages); + for_each_iommu(iommu) { + if (iommu->exclusion_start && + iommu->exclusion_start >= dma_dom->aperture[index]->offset + && iommu->exclusion_start < dma_dom->aperture_size) { + unsigned long startpage; + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length, + PAGE_SIZE); + startpage = iommu->exclusion_start >> PAGE_SHIFT; + dma_ops_reserve_addresses(dma_dom, startpage, pages); + } } /* @@ -1143,7 +1146,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) add_domain_to_list(&dma_dom->domain); - if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) + if (alloc_new_range(dma_dom, true, GFP_KERNEL)) goto free_dma_dom; /* @@ -1686,7 +1689,7 @@ retry: */ dma_dom->next_address = dma_dom->aperture_size; - if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) + if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) goto out; /* -- cgit v0.10.2 From cd8c82e875c27ee0d8b59fb76bc12aa9db6a70c2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 19:33:56 +0100 Subject: x86/amd-iommu: Remove iommu parameter from __(un)map_single With the prior changes this parameter is not longer required. This patch removes it from the function and all callers. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 687f617..c04dcb7 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1653,7 +1653,6 @@ static void dma_ops_domain_unmap(struct dma_ops_domain *dom, * Must be called with the domain lock held. */ static dma_addr_t __map_single(struct device *dev, - struct amd_iommu *iommu, struct dma_ops_domain *dma_dom, phys_addr_t paddr, size_t size, @@ -1737,8 +1736,7 @@ out_unmap: * Does the reverse of the __map_single function. Must be called with * the domain lock held too */ -static void __unmap_single(struct amd_iommu *iommu, - struct dma_ops_domain *dma_dom, +static void __unmap_single(struct dma_ops_domain *dma_dom, dma_addr_t dma_addr, size_t size, int dir) @@ -1797,7 +1795,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, return DMA_ERROR_CODE; spin_lock_irqsave(&domain->lock, flags); - addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, + addr = __map_single(dev, domain->priv, paddr, size, dir, false, dma_mask); if (addr == DMA_ERROR_CODE) goto out; @@ -1832,7 +1830,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, spin_lock_irqsave(&domain->lock, flags); - __unmap_single(iommu, domain->priv, dma_addr, size, dir); + __unmap_single(domain->priv, dma_addr, size, dir); iommu_flush_complete(domain); @@ -1890,7 +1888,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, for_each_sg(sglist, s, nelems, i) { paddr = sg_phys(s); - s->dma_address = __map_single(dev, iommu, domain->priv, + s->dma_address = __map_single(dev, domain->priv, paddr, s->length, dir, false, dma_mask); @@ -1910,7 +1908,7 @@ out: unmap: for_each_sg(sglist, s, mapped_elems, i) { if (s->dma_address) - __unmap_single(iommu, domain->priv, s->dma_address, + __unmap_single(domain->priv, s->dma_address, s->dma_length, dir); s->dma_address = s->dma_length = 0; } @@ -1946,7 +1944,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { - __unmap_single(iommu, domain->priv, s->dma_address, + __unmap_single(domain->priv, s->dma_address, s->dma_length, dir); s->dma_address = s->dma_length = 0; } @@ -1996,7 +1994,7 @@ static void *alloc_coherent(struct device *dev, size_t size, spin_lock_irqsave(&domain->lock, flags); - *dma_addr = __map_single(dev, iommu, domain->priv, paddr, + *dma_addr = __map_single(dev, domain->priv, paddr, size, DMA_BIDIRECTIONAL, true, dma_mask); if (*dma_addr == DMA_ERROR_CODE) { @@ -2038,7 +2036,7 @@ static void free_coherent(struct device *dev, size_t size, spin_lock_irqsave(&domain->lock, flags); - __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); + __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); iommu_flush_complete(domain); -- cgit v0.10.2 From f3be07da531ceef1b51295e5becc9bc07670b671 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 19:43:14 +0100 Subject: x86/amd-iommu: Remove iommu specific handling from dma_ops path This patch finishes the removal of all iommu specific handling code in the dma_ops path. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c04dcb7..2cd5800 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1436,11 +1436,11 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) * in this function. */ static bool get_device_resources(struct device *dev, - struct amd_iommu **iommu, struct protection_domain **domain, u16 *bdf) { struct dma_ops_domain *dma_dom; + struct amd_iommu *iommu; struct pci_dev *pcidev; u16 _bdf; @@ -1450,21 +1450,21 @@ static bool get_device_resources(struct device *dev, pcidev = to_pci_dev(dev); _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); *bdf = amd_iommu_alias_table[_bdf]; - *iommu = amd_iommu_rlookup_table[*bdf]; + iommu = amd_iommu_rlookup_table[*bdf]; *domain = domain_for_device(*bdf); if (*domain == NULL) { dma_dom = find_protection_domain(*bdf); if (!dma_dom) - dma_dom = (*iommu)->default_dom; + dma_dom = iommu->default_dom; *domain = &dma_dom->domain; - attach_device(*iommu, *domain, *bdf); + attach_device(iommu, *domain, *bdf); DUMP_printk("Using protection domain %d for device %s\n", (*domain)->id, dev_name(dev)); } if (domain_for_device(_bdf) == NULL) - attach_device(*iommu, *domain, _bdf); + attach_device(iommu, *domain, _bdf); return true; } @@ -1776,7 +1776,6 @@ static dma_addr_t map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; u16 devid; dma_addr_t addr; @@ -1785,7 +1784,7 @@ static dma_addr_t map_page(struct device *dev, struct page *page, INC_STATS_COUNTER(cnt_map_single); - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &domain, &devid)) /* device not handled by any AMD IOMMU */ return (dma_addr_t)paddr; @@ -1815,13 +1814,12 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; u16 devid; INC_STATS_COUNTER(cnt_unmap_single); - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &domain, &devid)) /* device not handled by any AMD IOMMU */ return; @@ -1864,7 +1862,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; u16 devid; int i; @@ -1875,7 +1872,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_map_sg); - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &domain, &devid)) return map_sg_no_iommu(dev, sglist, nelems, dir); dma_mask = *dev->dma_mask; @@ -1927,7 +1924,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_attrs *attrs) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; struct scatterlist *s; u16 devid; @@ -1935,7 +1931,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_unmap_sg); - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &domain, &devid)) return; if (!dma_ops_domain(domain)) @@ -1962,7 +1958,6 @@ static void *alloc_coherent(struct device *dev, size_t size, { unsigned long flags; void *virt_addr; - struct amd_iommu *iommu; struct protection_domain *domain; u16 devid; phys_addr_t paddr; @@ -1970,7 +1965,7 @@ static void *alloc_coherent(struct device *dev, size_t size, INC_STATS_COUNTER(cnt_alloc_coherent); - if (!get_device_resources(dev, &iommu, &domain, &devid)) { + if (!get_device_resources(dev, &domain, &devid)) { virt_addr = (void *)__get_free_pages(flag, get_order(size)); *dma_addr = __pa(virt_addr); return virt_addr; @@ -2022,13 +2017,12 @@ static void free_coherent(struct device *dev, size_t size, void *virt_addr, dma_addr_t dma_addr) { unsigned long flags; - struct amd_iommu *iommu; struct protection_domain *domain; u16 devid; INC_STATS_COUNTER(cnt_free_coherent); - if (!get_device_resources(dev, &iommu, &domain, &devid)) + if (!get_device_resources(dev, &domain, &devid)) goto free_mem; if (!dma_ops_domain(domain)) -- cgit v0.10.2 From 15898bbcb48fc86c2baff156163df0941ecb6a15 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 15:39:42 +0100 Subject: x86/amd-iommu: Let domain_for_device handle aliases If there is no domain associated to a device yet and the device has an alias device which already has a domain, the original device needs to have the same domain as the alias device. This patch changes domain_for_device to handle this situation and directly assigns the alias device domain to the device in this situation. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2cd5800..75470ff 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -71,6 +71,19 @@ static u64 *fetch_pte(struct protection_domain *domain, unsigned long address, int map_size); static void update_domain(struct protection_domain *domain); +/**************************************************************************** + * + * Helper functions + * + ****************************************************************************/ + +static inline u16 get_device_id(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + return calc_devid(pdev->bus->number, pdev->devfn); +} + #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -1174,26 +1187,13 @@ static bool dma_ops_domain(struct protection_domain *domain) return domain->flags & PD_DMA_OPS_MASK; } -/* - * Find out the protection domain structure for a given PCI device. This - * will give us the pointer to the page table root for example. - */ -static struct protection_domain *domain_for_device(u16 devid) -{ - struct protection_domain *dom; - unsigned long flags; - - read_lock_irqsave(&amd_iommu_devtable_lock, flags); - dom = amd_iommu_pd_table[devid]; - read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - - return dom; -} - static void set_dte_entry(u16 devid, struct protection_domain *domain) { + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; u64 pte_root = virt_to_phys(domain->pt_root); + BUG_ON(amd_iommu_pd_table[devid] != NULL); + pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; @@ -1203,42 +1203,87 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); amd_iommu_pd_table[devid] = domain; + + /* Do reference counting */ + domain->dev_iommu[iommu->index] += 1; + domain->dev_cnt += 1; + + /* Flush the changes DTE entry */ + iommu_queue_inv_dev_entry(iommu, devid); +} + +static void clear_dte_entry(u16 devid) +{ + struct protection_domain *domain = amd_iommu_pd_table[devid]; + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + BUG_ON(domain == NULL); + + /* remove domain from the lookup table */ + amd_iommu_pd_table[devid] = NULL; + + /* remove entry from the device table seen by the hardware */ + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[1] = 0; + amd_iommu_dev_table[devid].data[2] = 0; + + amd_iommu_apply_erratum_63(devid); + + /* decrease reference counters */ + domain->dev_iommu[iommu->index] -= 1; + domain->dev_cnt -= 1; + + iommu_queue_inv_dev_entry(iommu, devid); } /* * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware */ -static void __attach_device(struct amd_iommu *iommu, - struct protection_domain *domain, - u16 devid) +static int __attach_device(struct device *dev, + struct protection_domain *domain) { + u16 devid = get_device_id(dev); + u16 alias = amd_iommu_alias_table[devid]; + /* lock domain */ spin_lock(&domain->lock); - /* update DTE entry */ - set_dte_entry(devid, domain); + /* Some sanity checks */ + if (amd_iommu_pd_table[alias] != NULL && + amd_iommu_pd_table[alias] != domain) + return -EBUSY; - /* Do reference counting */ - domain->dev_iommu[iommu->index] += 1; - domain->dev_cnt += 1; + if (amd_iommu_pd_table[devid] != NULL && + amd_iommu_pd_table[devid] != domain) + return -EBUSY; + + /* Do real assignment */ + if (alias != devid && + amd_iommu_pd_table[alias] == NULL) + set_dte_entry(alias, domain); + + if (amd_iommu_pd_table[devid] == NULL) + set_dte_entry(devid, domain); /* ready */ spin_unlock(&domain->lock); + + return 0; } /* * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware */ -static void attach_device(struct amd_iommu *iommu, - struct protection_domain *domain, - u16 devid) +static int attach_device(struct device *dev, + struct protection_domain *domain) { unsigned long flags; + int ret; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - __attach_device(iommu, domain, devid); + ret = __attach_device(dev, domain); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); /* @@ -1246,62 +1291,70 @@ static void attach_device(struct amd_iommu *iommu, * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - iommu_queue_inv_dev_entry(iommu, devid); iommu_flush_tlb_pde(domain); + + return ret; } /* * Removes a device from a protection domain (unlocked) */ -static void __detach_device(struct protection_domain *domain, u16 devid) +static void __detach_device(struct device *dev) { + u16 devid = get_device_id(dev); struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; BUG_ON(!iommu); - /* lock domain */ - spin_lock(&domain->lock); - - /* remove domain from the lookup table */ - amd_iommu_pd_table[devid] = NULL; - - /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; - amd_iommu_dev_table[devid].data[1] = 0; - amd_iommu_dev_table[devid].data[2] = 0; - - amd_iommu_apply_erratum_63(devid); - - /* decrease reference counters */ - domain->dev_iommu[iommu->index] -= 1; - domain->dev_cnt -= 1; - - /* ready */ - spin_unlock(&domain->lock); + clear_dte_entry(devid); /* * If we run in passthrough mode the device must be assigned to the * passthrough domain if it is detached from any other domain */ - if (iommu_pass_through) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - __attach_device(iommu, pt_domain, devid); - } + if (iommu_pass_through) + __attach_device(dev, pt_domain); } /* * Removes a device from a protection domain (with devtable_lock held) */ -static void detach_device(struct protection_domain *domain, u16 devid) +static void detach_device(struct device *dev) { unsigned long flags; /* lock device table */ write_lock_irqsave(&amd_iommu_devtable_lock, flags); - __detach_device(domain, devid); + __detach_device(dev); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } +/* + * Find out the protection domain structure for a given PCI device. This + * will give us the pointer to the page table root for example. + */ +static struct protection_domain *domain_for_device(struct device *dev) +{ + struct protection_domain *dom; + unsigned long flags; + u16 devid, alias; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + + read_lock_irqsave(&amd_iommu_devtable_lock, flags); + dom = amd_iommu_pd_table[devid]; + if (dom == NULL && + amd_iommu_pd_table[alias] != NULL) { + __attach_device(dev, amd_iommu_pd_table[alias]); + dom = amd_iommu_pd_table[devid]; + } + + read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + return dom; +} + static int device_change_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -1322,7 +1375,7 @@ static int device_change_notifier(struct notifier_block *nb, if (iommu == NULL) goto out; - domain = domain_for_device(devid); + domain = domain_for_device(dev); if (domain && !dma_ops_domain(domain)) WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " @@ -1334,7 +1387,7 @@ static int device_change_notifier(struct notifier_block *nb, goto out; if (iommu_pass_through) break; - detach_device(domain, devid); + detach_device(dev); break; case BUS_NOTIFY_ADD_DEVICE: /* allocate a protection domain if a device is added */ @@ -1441,30 +1494,25 @@ static bool get_device_resources(struct device *dev, { struct dma_ops_domain *dma_dom; struct amd_iommu *iommu; - struct pci_dev *pcidev; - u16 _bdf; if (!check_device(dev)) return false; - pcidev = to_pci_dev(dev); - _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - *bdf = amd_iommu_alias_table[_bdf]; + *bdf = get_device_id(dev); + *domain = domain_for_device(dev); iommu = amd_iommu_rlookup_table[*bdf]; - *domain = domain_for_device(*bdf); - if (*domain == NULL) { - dma_dom = find_protection_domain(*bdf); - if (!dma_dom) - dma_dom = iommu->default_dom; - *domain = &dma_dom->domain; - attach_device(iommu, *domain, *bdf); - DUMP_printk("Using protection domain %d for device %s\n", - (*domain)->id, dev_name(dev)); - } + if (*domain != NULL) + return true; - if (domain_for_device(_bdf) == NULL) - attach_device(iommu, *domain, _bdf); + /* Device not bount yet - bind it */ + dma_dom = find_protection_domain(*bdf); + if (!dma_dom) + dma_dom = iommu->default_dom; + *domain = &dma_dom->domain; + attach_device(dev, *domain); + DUMP_printk("Using protection domain %d for device %s\n", + (*domain)->id, dev_name(dev)); return true; } @@ -2068,7 +2116,7 @@ static void prealloc_protection_domains(void) if (devid > amd_iommu_last_bdf) continue; devid = amd_iommu_alias_table[devid]; - if (domain_for_device(devid)) + if (domain_for_device(&dev->dev)) continue; iommu = amd_iommu_rlookup_table[devid]; if (!iommu) @@ -2079,9 +2127,7 @@ static void prealloc_protection_domains(void) init_unity_mappings_for_device(dma_dom, devid); dma_dom->target_dev = devid; - attach_device(iommu, &dma_dom->domain, devid); - if (__devid != devid) - attach_device(iommu, &dma_dom->domain, __devid); + attach_device(&dev->dev, &dma_dom->domain); list_add_tail(&dma_dom->list, &iommu_pd_list); } @@ -2174,7 +2220,7 @@ static void cleanup_domain(struct protection_domain *domain) for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) if (amd_iommu_pd_table[devid] == domain) - __detach_device(domain, devid); + clear_dte_entry(devid); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } @@ -2262,7 +2308,6 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { - struct protection_domain *domain = dom->priv; struct amd_iommu *iommu; struct pci_dev *pdev; u16 devid; @@ -2275,7 +2320,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, devid = calc_devid(pdev->bus->number, pdev->devfn); if (devid > 0) - detach_device(domain, devid); + detach_device(dev); iommu = amd_iommu_rlookup_table[devid]; if (!iommu) @@ -2292,6 +2337,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct protection_domain *old_domain; struct amd_iommu *iommu; struct pci_dev *pdev; + int ret; u16 devid; if (dev->bus != &pci_bus_type) @@ -2309,15 +2355,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, if (!iommu) return -EINVAL; - old_domain = domain_for_device(devid); + old_domain = amd_iommu_pd_table[devid]; if (old_domain) - detach_device(old_domain, devid); + detach_device(dev); - attach_device(iommu, domain, devid); + ret = attach_device(dev, domain); iommu_completion_wait(iommu); - return 0; + return ret; } static int amd_iommu_map_range(struct iommu_domain *dom, @@ -2414,8 +2460,9 @@ static struct iommu_ops amd_iommu_ops = { int __init amd_iommu_init_passthrough(void) { + struct amd_iommu *iommu; struct pci_dev *dev = NULL; - u16 devid, devid2; + u16 devid; /* allocate passthroug domain */ pt_domain = protection_domain_alloc(); @@ -2425,20 +2472,16 @@ int __init amd_iommu_init_passthrough(void) pt_domain->mode |= PAGE_MODE_NONE; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - struct amd_iommu *iommu; devid = calc_devid(dev->bus->number, dev->devfn); if (devid > amd_iommu_last_bdf) continue; - devid2 = amd_iommu_alias_table[devid]; - - iommu = amd_iommu_rlookup_table[devid2]; + iommu = amd_iommu_rlookup_table[devid]; if (!iommu) continue; - __attach_device(iommu, pt_domain, devid); - __attach_device(iommu, pt_domain, devid2); + attach_device(&dev->dev, pt_domain); } pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); -- cgit v0.10.2 From 94f6d190eeed91cb2bb901aa7816edd1e2405347 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 16:40:02 +0100 Subject: x86/amd-iommu: Simplify get_device_resources() With the previous changes the get_device_resources function can be simplified even more. The only important information for the callers is the protection domain. This patch renames the function to get_domain() and let it only return the protection domain for a device. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 75470ff..e5bbe9a 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1463,6 +1463,7 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) { struct dma_ops_domain *entry, *ret = NULL; unsigned long flags; + u16 alias = amd_iommu_alias_table[devid]; if (list_empty(&iommu_pd_list)) return NULL; @@ -1470,7 +1471,8 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) spin_lock_irqsave(&iommu_pd_list_lock, flags); list_for_each_entry(entry, &iommu_pd_list, list) { - if (entry->target_dev == devid) { + if (entry->target_dev == devid || + entry->target_dev == alias) { ret = entry; break; } @@ -1488,33 +1490,31 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) * If the device is not yet associated with a domain this is also done * in this function. */ -static bool get_device_resources(struct device *dev, - struct protection_domain **domain, - u16 *bdf) +static struct protection_domain *get_domain(struct device *dev) { + struct protection_domain *domain; struct dma_ops_domain *dma_dom; - struct amd_iommu *iommu; + u16 devid = get_device_id(dev); if (!check_device(dev)) - return false; + return ERR_PTR(-EINVAL); - *bdf = get_device_id(dev); - *domain = domain_for_device(dev); - iommu = amd_iommu_rlookup_table[*bdf]; + domain = domain_for_device(dev); + if (domain != NULL && !dma_ops_domain(domain)) + return ERR_PTR(-EBUSY); - if (*domain != NULL) - return true; + if (domain != NULL) + return domain; /* Device not bount yet - bind it */ - dma_dom = find_protection_domain(*bdf); + dma_dom = find_protection_domain(devid); if (!dma_dom) - dma_dom = iommu->default_dom; - *domain = &dma_dom->domain; - attach_device(dev, *domain); + dma_dom = amd_iommu_rlookup_table[devid]->default_dom; + attach_device(dev, &dma_dom->domain); DUMP_printk("Using protection domain %d for device %s\n", - (*domain)->id, dev_name(dev)); + dma_dom->domain.id, dev_name(dev)); - return true; + return &dma_dom->domain; } static void update_device_table(struct protection_domain *domain) @@ -1825,23 +1825,22 @@ static dma_addr_t map_page(struct device *dev, struct page *page, { unsigned long flags; struct protection_domain *domain; - u16 devid; dma_addr_t addr; u64 dma_mask; phys_addr_t paddr = page_to_phys(page) + offset; INC_STATS_COUNTER(cnt_map_single); - if (!get_device_resources(dev, &domain, &devid)) - /* device not handled by any AMD IOMMU */ + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) return (dma_addr_t)paddr; + else if (IS_ERR(domain)) + return DMA_ERROR_CODE; dma_mask = *dev->dma_mask; - if (!dma_ops_domain(domain)) - return DMA_ERROR_CODE; - spin_lock_irqsave(&domain->lock, flags); + addr = __map_single(dev, domain->priv, paddr, size, dir, false, dma_mask); if (addr == DMA_ERROR_CODE) @@ -1863,15 +1862,11 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, { unsigned long flags; struct protection_domain *domain; - u16 devid; INC_STATS_COUNTER(cnt_unmap_single); - if (!get_device_resources(dev, &domain, &devid)) - /* device not handled by any AMD IOMMU */ - return; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); @@ -1911,7 +1906,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, { unsigned long flags; struct protection_domain *domain; - u16 devid; int i; struct scatterlist *s; phys_addr_t paddr; @@ -1920,14 +1914,14 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, INC_STATS_COUNTER(cnt_map_sg); - if (!get_device_resources(dev, &domain, &devid)) + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) return map_sg_no_iommu(dev, sglist, nelems, dir); + else if (IS_ERR(domain)) + return 0; dma_mask = *dev->dma_mask; - if (!dma_ops_domain(domain)) - return 0; - spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { @@ -1974,15 +1968,12 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, unsigned long flags; struct protection_domain *domain; struct scatterlist *s; - u16 devid; int i; INC_STATS_COUNTER(cnt_unmap_sg); - if (!get_device_resources(dev, &domain, &devid)) - return; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) return; spin_lock_irqsave(&domain->lock, flags); @@ -2007,17 +1998,18 @@ static void *alloc_coherent(struct device *dev, size_t size, unsigned long flags; void *virt_addr; struct protection_domain *domain; - u16 devid; phys_addr_t paddr; u64 dma_mask = dev->coherent_dma_mask; INC_STATS_COUNTER(cnt_alloc_coherent); - if (!get_device_resources(dev, &domain, &devid)) { + domain = get_domain(dev); + if (PTR_ERR(domain) == -EINVAL) { virt_addr = (void *)__get_free_pages(flag, get_order(size)); *dma_addr = __pa(virt_addr); return virt_addr; - } + } else if (IS_ERR(domain)) + return NULL; dma_mask = dev->coherent_dma_mask; flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); @@ -2029,9 +2021,6 @@ static void *alloc_coherent(struct device *dev, size_t size, paddr = virt_to_phys(virt_addr); - if (!dma_ops_domain(domain)) - goto out_free; - if (!dma_mask) dma_mask = *dev->dma_mask; @@ -2066,14 +2055,11 @@ static void free_coherent(struct device *dev, size_t size, { unsigned long flags; struct protection_domain *domain; - u16 devid; INC_STATS_COUNTER(cnt_free_coherent); - if (!get_device_resources(dev, &domain, &devid)) - goto free_mem; - - if (!dma_ops_domain(domain)) + domain = get_domain(dev); + if (IS_ERR(domain)) goto free_mem; spin_lock_irqsave(&domain->lock, flags); -- cgit v0.10.2 From 71c70984e5afc20d304fbb523f1c8bb42c4ceb36 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 16:43:06 +0100 Subject: x86/amd-iommu: Move find_protection_domain to helper functions This is a helper function and when its placed in the helper function section we can remove its forward declaration. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e5bbe9a..405f8da 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -59,7 +59,6 @@ struct iommu_cmd { static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e); -static struct dma_ops_domain *find_protection_domain(u16 devid); static u64 *alloc_pte(struct protection_domain *domain, unsigned long address, int end_lvl, u64 **pte_page, gfp_t gfp); @@ -84,6 +83,34 @@ static inline u16 get_device_id(struct device *dev) return calc_devid(pdev->bus->number, pdev->devfn); } +/* + * In this function the list of preallocated protection domains is traversed to + * find the domain for a specific device + */ +static struct dma_ops_domain *find_protection_domain(u16 devid) +{ + struct dma_ops_domain *entry, *ret = NULL; + unsigned long flags; + u16 alias = amd_iommu_alias_table[devid]; + + if (list_empty(&iommu_pd_list)) + return NULL; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + + list_for_each_entry(entry, &iommu_pd_list, list) { + if (entry->target_dev == devid || + entry->target_dev == alias) { + ret = entry; + break; + } + } + + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + + return ret; +} + #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -1456,34 +1483,6 @@ static bool check_device(struct device *dev) } /* - * In this function the list of preallocated protection domains is traversed to - * find the domain for a specific device - */ -static struct dma_ops_domain *find_protection_domain(u16 devid) -{ - struct dma_ops_domain *entry, *ret = NULL; - unsigned long flags; - u16 alias = amd_iommu_alias_table[devid]; - - if (list_empty(&iommu_pd_list)) - return NULL; - - spin_lock_irqsave(&iommu_pd_list_lock, flags); - - list_for_each_entry(entry, &iommu_pd_list, list) { - if (entry->target_dev == devid || - entry->target_dev == alias) { - ret = entry; - break; - } - } - - spin_unlock_irqrestore(&iommu_pd_list_lock, flags); - - return ret; -} - -/* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the * requestor id for a given device. -- cgit v0.10.2 From 98fc5a693bbdda498a556654c70d1e31a186c988 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 17:19:23 +0100 Subject: x86/amd-iommu: Use get_device_id and check_device where appropriate The logic of these two functions is reimplemented (at least in parts) in places in the code. This patch removes these code duplications and uses the functions instead. As a side effect it moves check_device() to the helper function code section. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 405f8da..d10195b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -111,6 +111,33 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) return ret; } +/* + * This function checks if the driver got a valid device from the caller to + * avoid dereferencing invalid pointers. + */ +static bool check_device(struct device *dev) +{ + u16 devid; + + if (!dev || !dev->dma_mask) + return false; + + /* No device or no PCI device */ + if (!dev || dev->bus != &pci_bus_type) + return false; + + devid = get_device_id(dev); + + /* Out of our scope? */ + if (devid > amd_iommu_last_bdf) + return false; + + if (amd_iommu_rlookup_table[devid] == NULL) + return false; + + return true; +} + #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -1386,22 +1413,17 @@ static int device_change_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct pci_dev *pdev = to_pci_dev(dev); - u16 devid = calc_devid(pdev->bus->number, pdev->devfn); + u16 devid; struct protection_domain *domain; struct dma_ops_domain *dma_domain; struct amd_iommu *iommu; unsigned long flags; - if (devid > amd_iommu_last_bdf) - goto out; - - devid = amd_iommu_alias_table[devid]; - - iommu = amd_iommu_rlookup_table[devid]; - if (iommu == NULL) - goto out; + if (!check_device(dev)) + return 0; + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; domain = domain_for_device(dev); if (domain && !dma_ops_domain(domain)) @@ -1453,36 +1475,6 @@ static struct notifier_block device_nb = { *****************************************************************************/ /* - * This function checks if the driver got a valid device from the caller to - * avoid dereferencing invalid pointers. - */ -static bool check_device(struct device *dev) -{ - u16 bdf; - struct pci_dev *pcidev; - - if (!dev || !dev->dma_mask) - return false; - - /* No device or no PCI device */ - if (!dev || dev->bus != &pci_bus_type) - return false; - - pcidev = to_pci_dev(dev); - - bdf = calc_devid(pcidev->bus->number, pcidev->devfn); - - /* Out of our scope? */ - if (bdf > amd_iommu_last_bdf) - return false; - - if (amd_iommu_rlookup_table[bdf] == NULL) - return false; - - return true; -} - -/* * In the dma_ops path we only have the struct device. This function * finds the corresponding IOMMU, the protection domain and the * requestor id for a given device. @@ -2094,15 +2086,20 @@ static void prealloc_protection_domains(void) struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; struct amd_iommu *iommu; - u16 devid, __devid; + u16 devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - __devid = devid = calc_devid(dev->bus->number, dev->devfn); - if (devid > amd_iommu_last_bdf) + + /* Do we handle this device? */ + if (!check_device(&dev->dev)) continue; - devid = amd_iommu_alias_table[devid]; + + /* Is there already any domain for it? */ if (domain_for_device(&dev->dev)) continue; + + devid = get_device_id(&dev->dev); + iommu = amd_iommu_rlookup_table[devid]; if (!iommu) continue; @@ -2294,17 +2291,14 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { struct amd_iommu *iommu; - struct pci_dev *pdev; u16 devid; - if (dev->bus != &pci_bus_type) + if (!check_device(dev)) return; - pdev = to_pci_dev(dev); - - devid = calc_devid(pdev->bus->number, pdev->devfn); + devid = get_device_id(dev); - if (devid > 0) + if (amd_iommu_pd_table[devid] != NULL) detach_device(dev); iommu = amd_iommu_rlookup_table[devid]; @@ -2321,20 +2315,13 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct protection_domain *domain = dom->priv; struct protection_domain *old_domain; struct amd_iommu *iommu; - struct pci_dev *pdev; int ret; u16 devid; - if (dev->bus != &pci_bus_type) + if (!check_device(dev)) return -EINVAL; - pdev = to_pci_dev(dev); - - devid = calc_devid(pdev->bus->number, pdev->devfn); - - if (devid >= amd_iommu_last_bdf || - devid != amd_iommu_alias_table[devid]) - return -EINVAL; + devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; if (!iommu) @@ -2458,10 +2445,11 @@ int __init amd_iommu_init_passthrough(void) while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - devid = calc_devid(dev->bus->number, dev->devfn); - if (devid > amd_iommu_last_bdf) + if (!check_device(&dev->dev)) continue; + devid = get_device_id(&dev->dev); + iommu = amd_iommu_rlookup_table[devid]; if (!iommu) continue; -- cgit v0.10.2 From 87a64d523825351a23743e69949c2a8c2077cecf Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 17:26:43 +0100 Subject: x86/amd-iommu: Remove iommu parameter from dma_ops_domain_alloc This function doesn't use the parameter anymore so it can be removed. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d10195b..17e83ec 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1188,7 +1188,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) * It also intializes the page table and the address allocator data * structures required for the dma_ops interface */ -static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) +static struct dma_ops_domain *dma_ops_domain_alloc(void) { struct dma_ops_domain *dma_dom; @@ -1443,7 +1443,7 @@ static int device_change_notifier(struct notifier_block *nb, dma_domain = find_protection_domain(devid); if (dma_domain) goto out; - dma_domain = dma_ops_domain_alloc(iommu); + dma_domain = dma_ops_domain_alloc(); if (!dma_domain) goto out; dma_domain->target_dev = devid; @@ -2085,7 +2085,6 @@ static void prealloc_protection_domains(void) { struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; - struct amd_iommu *iommu; u16 devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { @@ -2100,10 +2099,7 @@ static void prealloc_protection_domains(void) devid = get_device_id(&dev->dev); - iommu = amd_iommu_rlookup_table[devid]; - if (!iommu) - continue; - dma_dom = dma_ops_domain_alloc(iommu); + dma_dom = dma_ops_domain_alloc(); if (!dma_dom) continue; init_unity_mappings_for_device(dma_dom, devid); @@ -2139,7 +2135,7 @@ int __init amd_iommu_init_dma_ops(void) * protection domain will be assigned to the default one. */ for_each_iommu(iommu) { - iommu->default_dom = dma_ops_domain_alloc(iommu); + iommu->default_dom = dma_ops_domain_alloc(); if (iommu->default_dom == NULL) return -ENOMEM; iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; -- cgit v0.10.2 From 308973d3b958b9328a1051642c81ee6dbc5021a4 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 17:43:32 +0100 Subject: x86/amd-iommu: Move some pte allocation functions in the right section This patch moves alloc_pte() and fetch_pte() into the page table handling code section so that the forward declarations for them could be removed. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 17e83ec..90b3650 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -59,15 +59,10 @@ struct iommu_cmd { static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e); -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, int end_lvl, - u64 **pte_page, gfp_t gfp); static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, unsigned long start_page, unsigned int pages); static void reset_iommu_command_buffer(struct amd_iommu *iommu); -static u64 *fetch_pte(struct protection_domain *domain, - unsigned long address, int map_size); static void update_domain(struct protection_domain *domain); /**************************************************************************** @@ -665,6 +660,100 @@ void amd_iommu_flush_all_devices(void) ****************************************************************************/ /* + * This function is used to add another level to an IO page table. Adding + * another level increases the size of the address space by 9 bits to a size up + * to 64 bits. + */ +static bool increase_address_space(struct protection_domain *domain, + gfp_t gfp) +{ + u64 *pte; + + if (domain->mode == PAGE_MODE_6_LEVEL) + /* address space already 64 bit large */ + return false; + + pte = (void *)get_zeroed_page(gfp); + if (!pte) + return false; + + *pte = PM_LEVEL_PDE(domain->mode, + virt_to_phys(domain->pt_root)); + domain->pt_root = pte; + domain->mode += 1; + domain->updated = true; + + return true; +} + +static u64 *alloc_pte(struct protection_domain *domain, + unsigned long address, + int end_lvl, + u64 **pte_page, + gfp_t gfp) +{ + u64 *pte, *page; + int level; + + while (address > PM_LEVEL_SIZE(domain->mode)) + increase_address_space(domain, gfp); + + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + + while (level > end_lvl) { + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) + return NULL; + *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); + } + + level -= 1; + + pte = IOMMU_PTE_PAGE(*pte); + + if (pte_page && level == end_lvl) + *pte_page = pte; + + pte = &pte[PM_LEVEL_INDEX(level, address)]; + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, int map_size) +{ + int level; + u64 *pte; + + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + + while (level > map_size) { + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + level -= 1; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[PM_LEVEL_INDEX(level, address)]; + + if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { + pte = NULL; + break; + } + } + + return pte; +} + +/* * Generic mapping functions. It maps a physical address into a DMA * address space. It allocates the page table pages if necessary. * In the future it can be extended to a generic mapping function @@ -820,37 +909,6 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, */ /* - * This function checks if there is a PTE for a given dma address. If - * there is one, it returns the pointer to it. - */ -static u64 *fetch_pte(struct protection_domain *domain, - unsigned long address, int map_size) -{ - int level; - u64 *pte; - - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - - while (level > map_size) { - if (!IOMMU_PTE_PRESENT(*pte)) - return NULL; - - level -= 1; - - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[PM_LEVEL_INDEX(level, address)]; - - if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { - pte = NULL; - break; - } - } - - return pte; -} - -/* * This function is used to add a new aperture range to an existing * aperture in case of dma_ops domain allocation or address allocation * failure. @@ -1535,69 +1593,6 @@ static void update_domain(struct protection_domain *domain) } /* - * This function is used to add another level to an IO page table. Adding - * another level increases the size of the address space by 9 bits to a size up - * to 64 bits. - */ -static bool increase_address_space(struct protection_domain *domain, - gfp_t gfp) -{ - u64 *pte; - - if (domain->mode == PAGE_MODE_6_LEVEL) - /* address space already 64 bit large */ - return false; - - pte = (void *)get_zeroed_page(gfp); - if (!pte) - return false; - - *pte = PM_LEVEL_PDE(domain->mode, - virt_to_phys(domain->pt_root)); - domain->pt_root = pte; - domain->mode += 1; - domain->updated = true; - - return true; -} - -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, - int end_lvl, - u64 **pte_page, - gfp_t gfp) -{ - u64 *pte, *page; - int level; - - while (address > PM_LEVEL_SIZE(domain->mode)) - increase_address_space(domain, gfp); - - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - - while (level > end_lvl) { - if (!IOMMU_PTE_PRESENT(*pte)) { - page = (u64 *)get_zeroed_page(gfp); - if (!page) - return NULL; - *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); - } - - level -= 1; - - pte = IOMMU_PTE_PAGE(*pte); - - if (pte_page && level == end_lvl) - *pte_page = pte; - - pte = &pte[PM_LEVEL_INDEX(level, address)]; - } - - return pte; -} - -/* * This function fetches the PTE for a given address in the aperture */ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, -- cgit v0.10.2 From 171e7b3739e175eea7b32eca9dbe189589e14a28 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 24 Nov 2009 17:47:56 +0100 Subject: x86/amd-iommu: Rearrange dma_ops related functions This patch rearranges two dma_ops related functions so that their forward declarations are not longer necessary. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 90b3650..14b60c0 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -57,11 +57,6 @@ struct iommu_cmd { u32 data[4]; }; -static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, - struct unity_map_entry *e); -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, - unsigned long start_page, - unsigned int pages); static void reset_iommu_command_buffer(struct amd_iommu *iommu); static void update_domain(struct protection_domain *domain); @@ -823,28 +818,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, } /* - * Init the unity mappings for a specific IOMMU in the system - * - * Basically iterates over all unity mapping entries and applies them to - * the default domain DMA of that IOMMU if necessary. - */ -static int iommu_init_unity_mappings(struct amd_iommu *iommu) -{ - struct unity_map_entry *entry; - int ret; - - list_for_each_entry(entry, &amd_iommu_unity_map, list) { - if (!iommu_for_unity_map(iommu, entry)) - continue; - ret = dma_ops_unity_map(iommu->default_dom, entry); - if (ret) - return ret; - } - - return 0; -} - -/* * This function actually applies the mapping to the page table of the * dma_ops domain. */ @@ -873,6 +846,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, } /* + * Init the unity mappings for a specific IOMMU in the system + * + * Basically iterates over all unity mapping entries and applies them to + * the default domain DMA of that IOMMU if necessary. + */ +static int iommu_init_unity_mappings(struct amd_iommu *iommu) +{ + struct unity_map_entry *entry; + int ret; + + list_for_each_entry(entry, &amd_iommu_unity_map, list) { + if (!iommu_for_unity_map(iommu, entry)) + continue; + ret = dma_ops_unity_map(iommu->default_dom, entry); + if (ret) + return ret; + } + + return 0; +} + +/* * Inits the unity mappings required for a specific device */ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, @@ -909,6 +904,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, */ /* + * Used to reserve address ranges in the aperture (e.g. for exclusion + * ranges. + */ +static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, + unsigned long start_page, + unsigned int pages) +{ + unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; + + if (start_page + pages > last_page) + pages = last_page - start_page; + + for (i = start_page; i < start_page + pages; ++i) { + int index = i / APERTURE_RANGE_PAGES; + int page = i % APERTURE_RANGE_PAGES; + __set_bit(page, dom->aperture[index]->bitmap); + } +} + +/* * This function is used to add a new aperture range to an existing * aperture in case of dma_ops domain allocation or address allocation * failure. @@ -1166,26 +1181,6 @@ static void domain_id_free(int id) write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } -/* - * Used to reserve address ranges in the aperture (e.g. for exclusion - * ranges. - */ -static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, - unsigned long start_page, - unsigned int pages) -{ - unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; - - if (start_page + pages > last_page) - pages = last_page - start_page; - - for (i = start_page; i < start_page + pages; ++i) { - int index = i / APERTURE_RANGE_PAGES; - int page = i % APERTURE_RANGE_PAGES; - __set_bit(page, dom->aperture[index]->bitmap); - } -} - static void free_pagetable(struct protection_domain *domain) { int i, j; -- cgit v0.10.2 From 8793abeb783c12cc37f92f6133fd6468152b98df Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 27 Nov 2009 11:40:33 +0100 Subject: x86/amd-iommu: Remove support for domain sharing This patch makes device isolation mandatory and removes support for the amd_iommu=share option. This simplifies the code in several places. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 4899f783..02b6a0f 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -451,9 +451,6 @@ extern struct protection_domain **amd_iommu_pd_table; /* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; -/* will be 1 if device isolation is enabled */ -extern bool amd_iommu_isolate; - /* * If true, the addresses will be flushed on unmap time, not when * they are reused diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 14b60c0..ed58a16 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -148,7 +148,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem); DECLARE_STATS_COUNTER(total_map_requests); static struct dentry *stats_dir; -static struct dentry *de_isolate; static struct dentry *de_fflush; static void amd_iommu_stats_add(struct __iommu_counter *cnt) @@ -166,9 +165,6 @@ static void amd_iommu_stats_init(void) if (stats_dir == NULL) return; - de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, - (u32 *)&amd_iommu_isolate); - de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, (u32 *)&amd_iommu_unmap_flush); @@ -2135,11 +2131,9 @@ int __init amd_iommu_init_dma_ops(void) } /* - * If device isolation is enabled, pre-allocate the protection - * domains for each device. + * Pre-allocate the protection domains for each device. */ - if (amd_iommu_isolate) - prealloc_protection_domains(); + prealloc_protection_domains(); iommu_detected = 1; swiotlb = 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index fbe4c3c..fe1686f 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -125,13 +125,6 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have to handle */ LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ -#ifdef CONFIG_IOMMU_STRESS -bool amd_iommu_isolate = false; -#else -bool amd_iommu_isolate = true; /* if true, device isolation is - enabled */ -#endif - bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the @@ -1308,12 +1301,6 @@ static int __init amd_iommu_init(void) if (iommu_pass_through) goto out; - printk(KERN_INFO "AMD-Vi: device isolation "); - if (amd_iommu_isolate) - printk("enabled\n"); - else - printk("disabled\n"); - if (amd_iommu_unmap_flush) printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); else @@ -1387,10 +1374,6 @@ static int __init parse_amd_iommu_dump(char *str) static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { - if (strncmp(str, "isolate", 7) == 0) - amd_iommu_isolate = true; - if (strncmp(str, "share", 5) == 0) - amd_iommu_isolate = false; if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; } -- cgit v0.10.2 From 657cbb6b6cba0f9c98c5299e0c803b2c0e67ea0a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 23 Nov 2009 15:26:46 +0100 Subject: x86/amd-iommu: Use dev->arch->iommu to store iommu related information This patch changes IOMMU code to use dev->archdata->iommu to store information about the alias device and the domain the device is attached to. This allows the driver to get rid of the amd_iommu_pd_table in the future. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 02b6a0f..9eaa27b 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -248,6 +248,14 @@ struct protection_domain { }; /* + * This struct contains device specific data for the IOMMU + */ +struct iommu_dev_data { + struct device *alias; /* The Alias Device */ + struct protection_domain *domain; /* Domain the device is bound to */ +}; + +/* * For dynamic growth the aperture size is split into ranges of 128MB of * DMA address space each. This struct represents one such range. */ diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index cee34e9..029f230 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -8,7 +8,7 @@ struct dev_archdata { #ifdef CONFIG_X86_64 struct dma_map_ops *dma_ops; #endif -#ifdef CONFIG_DMAR +#if defined(CONFIG_DMAR) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif }; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index ed58a16..3214e88 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -73,6 +73,11 @@ static inline u16 get_device_id(struct device *dev) return calc_devid(pdev->bus->number, pdev->devfn); } +static struct iommu_dev_data *get_dev_data(struct device *dev) +{ + return dev->archdata.iommu; +} + /* * In this function the list of preallocated protection domains is traversed to * find the domain for a specific device @@ -128,6 +133,35 @@ static bool check_device(struct device *dev) return true; } +static int iommu_init_device(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct pci_dev *pdev; + u16 devid, alias; + + if (dev->archdata.iommu) + return 0; + + dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); + if (!dev_data) + return -ENOMEM; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); + if (pdev) + dev_data->alias = &pdev->dev; + + dev->archdata.iommu = dev_data; + + + return 0; +} + +static void iommu_uninit_device(struct device *dev) +{ + kfree(dev->archdata.iommu); +} #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -1346,28 +1380,39 @@ static void clear_dte_entry(u16 devid) static int __attach_device(struct device *dev, struct protection_domain *domain) { - u16 devid = get_device_id(dev); - u16 alias = amd_iommu_alias_table[devid]; + struct iommu_dev_data *dev_data, *alias_data; + u16 devid, alias; + + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + dev_data = get_dev_data(dev); + alias_data = get_dev_data(dev_data->alias); + if (!alias_data) + return -EINVAL; /* lock domain */ spin_lock(&domain->lock); /* Some sanity checks */ - if (amd_iommu_pd_table[alias] != NULL && - amd_iommu_pd_table[alias] != domain) + if (alias_data->domain != NULL && + alias_data->domain != domain) return -EBUSY; - if (amd_iommu_pd_table[devid] != NULL && - amd_iommu_pd_table[devid] != domain) + if (dev_data->domain != NULL && + dev_data->domain != domain) return -EBUSY; /* Do real assignment */ if (alias != devid && - amd_iommu_pd_table[alias] == NULL) + alias_data->domain == NULL) { + alias_data->domain = domain; set_dte_entry(alias, domain); + } - if (amd_iommu_pd_table[devid] == NULL) + if (dev_data->domain == NULL) { + dev_data->domain = domain; set_dte_entry(devid, domain); + } /* ready */ spin_unlock(&domain->lock); @@ -1406,10 +1451,12 @@ static void __detach_device(struct device *dev) { u16 devid = get_device_id(dev); struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + struct iommu_dev_data *dev_data = get_dev_data(dev); BUG_ON(!iommu); clear_dte_entry(devid); + dev_data->domain = NULL; /* * If we run in passthrough mode the device must be assigned to the @@ -1439,18 +1486,23 @@ static void detach_device(struct device *dev) static struct protection_domain *domain_for_device(struct device *dev) { struct protection_domain *dom; + struct iommu_dev_data *dev_data, *alias_data; unsigned long flags; u16 devid, alias; - devid = get_device_id(dev); - alias = amd_iommu_alias_table[devid]; + devid = get_device_id(dev); + alias = amd_iommu_alias_table[devid]; + dev_data = get_dev_data(dev); + alias_data = get_dev_data(dev_data->alias); + if (!alias_data) + return NULL; read_lock_irqsave(&amd_iommu_devtable_lock, flags); - dom = amd_iommu_pd_table[devid]; + dom = dev_data->domain; if (dom == NULL && - amd_iommu_pd_table[alias] != NULL) { - __attach_device(dev, amd_iommu_pd_table[alias]); - dom = amd_iommu_pd_table[devid]; + alias_data->domain != NULL) { + __attach_device(dev, alias_data->domain); + dom = alias_data->domain; } read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); @@ -1473,14 +1525,12 @@ static int device_change_notifier(struct notifier_block *nb, devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; - domain = domain_for_device(dev); - - if (domain && !dma_ops_domain(domain)) - WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " - "to a non-dma-ops domain\n", dev_name(dev)); switch (action) { case BUS_NOTIFY_UNBOUND_DRIVER: + + domain = domain_for_device(dev); + if (!domain) goto out; if (iommu_pass_through) @@ -1488,6 +1538,11 @@ static int device_change_notifier(struct notifier_block *nb, detach_device(dev); break; case BUS_NOTIFY_ADD_DEVICE: + + iommu_init_device(dev); + + domain = domain_for_device(dev); + /* allocate a protection domain if a device is added */ dma_domain = find_protection_domain(devid); if (dma_domain) @@ -1502,6 +1557,10 @@ static int device_change_notifier(struct notifier_block *nb, spin_unlock_irqrestore(&iommu_pd_list_lock, flags); break; + case BUS_NOTIFY_DEL_DEVICE: + + iommu_uninit_device(dev); + default: goto out; } @@ -2079,6 +2138,8 @@ static void prealloc_protection_domains(void) if (!check_device(&dev->dev)) continue; + iommu_init_device(&dev->dev); + /* Is there already any domain for it? */ if (domain_for_device(&dev->dev)) continue; @@ -2270,6 +2331,7 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { + struct iommu_dev_data *dev_data = dev->archdata.iommu; struct amd_iommu *iommu; u16 devid; @@ -2278,7 +2340,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, devid = get_device_id(dev); - if (amd_iommu_pd_table[devid] != NULL) + if (dev_data->domain != NULL) detach_device(dev); iommu = amd_iommu_rlookup_table[devid]; @@ -2293,7 +2355,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { struct protection_domain *domain = dom->priv; - struct protection_domain *old_domain; + struct iommu_dev_data *dev_data; struct amd_iommu *iommu; int ret; u16 devid; @@ -2301,14 +2363,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, if (!check_device(dev)) return -EINVAL; + dev_data = dev->archdata.iommu; + devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; if (!iommu) return -EINVAL; - old_domain = amd_iommu_pd_table[devid]; - if (old_domain) + if (dev_data->domain) detach_device(dev); ret = attach_device(dev, domain); -- cgit v0.10.2 From 241000556f751dacd332df6ab2e903a23746e51e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 25 Nov 2009 15:59:57 +0100 Subject: x86/amd-iommu: Add device bind reference counting This patch adds a reference count to each device to count how often the device was bound to that domain. This is important for single devices that act as an alias for a number of others. These devices must stay bound to their domains until all devices that alias to it are unbound from the same domain. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 9eaa27b..434e90e 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -253,6 +253,7 @@ struct protection_domain { struct iommu_dev_data { struct device *alias; /* The Alias Device */ struct protection_domain *domain; /* Domain the device is bound to */ + atomic_t bind; /* Domain attach reverent count */ }; /* diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3214e88..f5db7d5 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -152,6 +152,8 @@ static int iommu_init_device(struct device *dev) if (pdev) dev_data->alias = &pdev->dev; + atomic_set(&dev_data->bind, 0); + dev->archdata.iommu = dev_data; @@ -1403,10 +1405,13 @@ static int __attach_device(struct device *dev, return -EBUSY; /* Do real assignment */ - if (alias != devid && - alias_data->domain == NULL) { - alias_data->domain = domain; - set_dte_entry(alias, domain); + if (alias != devid) { + if (alias_data->domain == NULL) { + alias_data->domain = domain; + set_dte_entry(alias, domain); + } + + atomic_inc(&alias_data->bind); } if (dev_data->domain == NULL) { @@ -1414,6 +1419,8 @@ static int __attach_device(struct device *dev, set_dte_entry(devid, domain); } + atomic_inc(&dev_data->bind); + /* ready */ spin_unlock(&domain->lock); @@ -1449,20 +1456,34 @@ static int attach_device(struct device *dev, */ static void __detach_device(struct device *dev) { - u16 devid = get_device_id(dev); + u16 devid = get_device_id(dev), alias; struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; struct iommu_dev_data *dev_data = get_dev_data(dev); + struct iommu_dev_data *alias_data; BUG_ON(!iommu); - clear_dte_entry(devid); - dev_data->domain = NULL; + devid = get_device_id(dev); + alias = get_device_id(dev_data->alias); + + if (devid != alias) { + alias_data = get_dev_data(dev_data->alias); + if (atomic_dec_and_test(&alias_data->bind)) { + clear_dte_entry(alias); + alias_data->domain = NULL; + } + } + + if (atomic_dec_and_test(&dev_data->bind)) { + clear_dte_entry(devid); + dev_data->domain = NULL; + } /* * If we run in passthrough mode the device must be assigned to the * passthrough domain if it is detached from any other domain */ - if (iommu_pass_through) + if (iommu_pass_through && dev_data->domain == NULL) __attach_device(dev, pt_domain); } -- cgit v0.10.2 From 7c392cbe984d904f7c89a6a75b2ac245254e8da5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Nov 2009 11:13:32 +0100 Subject: x86/amd-iommu: Keep devices per domain in a list This patch introduces a list to each protection domain which keeps all devices associated with the domain. This can be used later to optimize certain functions and to completly remove the amd_iommu_pd_table. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 434e90e..93953d1 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -235,6 +235,7 @@ extern bool amd_iommu_np_cache; */ struct protection_domain { struct list_head list; /* for list of all protection domains */ + struct list_head dev_list; /* List of all devices in this domain */ spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ int mode; /* paging mode (0-6 levels) */ @@ -251,6 +252,7 @@ struct protection_domain { * This struct contains device specific data for the IOMMU */ struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ struct device *alias; /* The Alias Device */ struct protection_domain *domain; /* Domain the device is bound to */ atomic_t bind; /* Domain attach reverent count */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f5db7d5..530d608 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1286,6 +1286,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) dma_dom->domain.id = domain_id_alloc(); if (dma_dom->domain.id == 0) goto free_dma_dom; + INIT_LIST_HEAD(&dma_dom->domain.dev_list); dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; @@ -1408,6 +1409,7 @@ static int __attach_device(struct device *dev, if (alias != devid) { if (alias_data->domain == NULL) { alias_data->domain = domain; + list_add(&alias_data->list, &domain->dev_list); set_dte_entry(alias, domain); } @@ -1416,6 +1418,7 @@ static int __attach_device(struct device *dev, if (dev_data->domain == NULL) { dev_data->domain = domain; + list_add(&dev_data->list, &domain->dev_list); set_dte_entry(devid, domain); } @@ -1460,6 +1463,7 @@ static void __detach_device(struct device *dev) struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; struct iommu_dev_data *dev_data = get_dev_data(dev); struct iommu_dev_data *alias_data; + unsigned long flags; BUG_ON(!iommu); @@ -1469,13 +1473,19 @@ static void __detach_device(struct device *dev) if (devid != alias) { alias_data = get_dev_data(dev_data->alias); if (atomic_dec_and_test(&alias_data->bind)) { + spin_lock_irqsave(&alias_data->domain->lock, flags); clear_dte_entry(alias); + list_del(&alias_data->list); + spin_unlock_irqrestore(&alias_data->domain->lock, flags); alias_data->domain = NULL; } } if (atomic_dec_and_test(&dev_data->bind)) { + spin_lock_irqsave(&dev_data->domain->lock, flags); clear_dte_entry(devid); + list_del(&dev_data->list); + spin_unlock_irqrestore(&dev_data->domain->lock, flags); dev_data->domain = NULL; } @@ -2294,6 +2304,7 @@ static struct protection_domain *protection_domain_alloc(void) domain->id = domain_id_alloc(); if (!domain->id) goto out_err; + INIT_LIST_HEAD(&domain->dev_list); add_domain_to_list(domain); -- cgit v0.10.2 From 7f760ddd702d162d693bc79f62c3bdd7fe55bd9d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Nov 2009 14:49:59 +0100 Subject: x86/amd-iommu: Cleanup attach/detach_device code This patch cleans up the attach_device and detach_device paths and fixes reference counting while at it. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 530d608..e3363fd 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1329,7 +1329,6 @@ static bool dma_ops_domain(struct protection_domain *domain) static void set_dte_entry(u16 devid, struct protection_domain *domain) { - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; u64 pte_root = virt_to_phys(domain->pt_root); BUG_ON(amd_iommu_pd_table[devid] != NULL); @@ -1344,18 +1343,11 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) amd_iommu_pd_table[devid] = domain; - /* Do reference counting */ - domain->dev_iommu[iommu->index] += 1; - domain->dev_cnt += 1; - - /* Flush the changes DTE entry */ - iommu_queue_inv_dev_entry(iommu, devid); } static void clear_dte_entry(u16 devid) { struct protection_domain *domain = amd_iommu_pd_table[devid]; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; BUG_ON(domain == NULL); @@ -1368,11 +1360,51 @@ static void clear_dte_entry(u16 devid) amd_iommu_dev_table[devid].data[2] = 0; amd_iommu_apply_erratum_63(devid); +} + +static void do_attach(struct device *dev, struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); + + /* Update data structures */ + dev_data->domain = domain; + list_add(&dev_data->list, &domain->dev_list); + set_dte_entry(devid, domain); + + /* Do reference counting */ + domain->dev_iommu[iommu->index] += 1; + domain->dev_cnt += 1; + + /* Flush the DTE entry */ + iommu_queue_inv_dev_entry(iommu, devid); +} + +static void do_detach(struct device *dev) +{ + struct iommu_dev_data *dev_data; + struct amd_iommu *iommu; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + dev_data = get_dev_data(dev); /* decrease reference counters */ - domain->dev_iommu[iommu->index] -= 1; - domain->dev_cnt -= 1; + dev_data->domain->dev_iommu[iommu->index] -= 1; + dev_data->domain->dev_cnt -= 1; + + /* Update data structures */ + dev_data->domain = NULL; + list_del(&dev_data->list); + clear_dte_entry(devid); + /* Flush the DTE entry */ iommu_queue_inv_dev_entry(iommu, devid); } @@ -1384,12 +1416,10 @@ static int __attach_device(struct device *dev, struct protection_domain *domain) { struct iommu_dev_data *dev_data, *alias_data; - u16 devid, alias; - devid = get_device_id(dev); - alias = amd_iommu_alias_table[devid]; dev_data = get_dev_data(dev); alias_data = get_dev_data(dev_data->alias); + if (!alias_data) return -EINVAL; @@ -1406,21 +1436,16 @@ static int __attach_device(struct device *dev, return -EBUSY; /* Do real assignment */ - if (alias != devid) { - if (alias_data->domain == NULL) { - alias_data->domain = domain; - list_add(&alias_data->list, &domain->dev_list); - set_dte_entry(alias, domain); - } + if (dev_data->alias != dev) { + alias_data = get_dev_data(dev_data->alias); + if (alias_data->domain == NULL) + do_attach(dev_data->alias, domain); atomic_inc(&alias_data->bind); } - if (dev_data->domain == NULL) { - dev_data->domain = domain; - list_add(&dev_data->list, &domain->dev_list); - set_dte_entry(devid, domain); - } + if (dev_data->domain == NULL) + do_attach(dev, domain); atomic_inc(&dev_data->bind); @@ -1459,35 +1484,24 @@ static int attach_device(struct device *dev, */ static void __detach_device(struct device *dev) { - u16 devid = get_device_id(dev), alias; - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; struct iommu_dev_data *dev_data = get_dev_data(dev); struct iommu_dev_data *alias_data; unsigned long flags; - BUG_ON(!iommu); + BUG_ON(!dev_data->domain); - devid = get_device_id(dev); - alias = get_device_id(dev_data->alias); + spin_lock_irqsave(&dev_data->domain->lock, flags); - if (devid != alias) { + if (dev_data->alias != dev) { alias_data = get_dev_data(dev_data->alias); - if (atomic_dec_and_test(&alias_data->bind)) { - spin_lock_irqsave(&alias_data->domain->lock, flags); - clear_dte_entry(alias); - list_del(&alias_data->list); - spin_unlock_irqrestore(&alias_data->domain->lock, flags); - alias_data->domain = NULL; - } + if (atomic_dec_and_test(&alias_data->bind)) + do_detach(dev_data->alias); } - if (atomic_dec_and_test(&dev_data->bind)) { - spin_lock_irqsave(&dev_data->domain->lock, flags); - clear_dte_entry(devid); - list_del(&dev_data->list); - spin_unlock_irqrestore(&dev_data->domain->lock, flags); - dev_data->domain = NULL; - } + if (atomic_dec_and_test(&dev_data->bind)) + do_detach(dev); + + spin_unlock_irqrestore(&dev_data->domain->lock, flags); /* * If we run in passthrough mode the device must be assigned to the -- cgit v0.10.2 From 3fa43655d81d471d47c44b0db4e2be1f8af32207 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Nov 2009 15:04:38 +0100 Subject: x86/amd-iommu: Introduce iommu_flush_device() function This patch adds a function to flush a DTE entry for a given struct device and replaces iommu_queue_inv_dev_entry calls with this function where appropriate. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e3363fd..41c4ebe 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -494,6 +494,17 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) return ret; } +static int iommu_flush_device(struct device *dev) +{ + struct amd_iommu *iommu; + u16 devid; + + devid = get_device_id(dev); + iommu = amd_iommu_rlookup_table[devid]; + + return iommu_queue_inv_dev_entry(iommu, devid); +} + static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, u16 domid, int pde, int s) { @@ -1382,7 +1393,7 @@ static void do_attach(struct device *dev, struct protection_domain *domain) domain->dev_cnt += 1; /* Flush the DTE entry */ - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); } static void do_detach(struct device *dev) @@ -1405,7 +1416,7 @@ static void do_detach(struct device *dev) clear_dte_entry(devid); /* Flush the DTE entry */ - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); } /* @@ -1610,7 +1621,7 @@ static int device_change_notifier(struct notifier_block *nb, goto out; } - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); iommu_completion_wait(iommu); out: @@ -2393,7 +2404,7 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, if (!iommu) return; - iommu_queue_inv_dev_entry(iommu, devid); + iommu_flush_device(dev); iommu_completion_wait(iommu); } -- cgit v0.10.2 From b00d3bcff4d996f65e337d404b0df5dc201a01ab Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Nov 2009 15:35:33 +0100 Subject: x86/amd-iommu: Cleanup DTE flushing code This patch cleans up the code to flush device table entries in the IOMMU. With this chance the driver can get rid of the iommu_queue_inv_dev_entry() function. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 93953d1..f92d1b3 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -253,6 +253,7 @@ struct protection_domain { */ struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ + struct device *dev; /* Device this data belong to */ struct device *alias; /* The Alias Device */ struct protection_domain *domain; /* Domain the device is bound to */ atomic_t bind; /* Domain attach reverent count */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 41c4ebe..0eafca5 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -146,6 +146,8 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; + dev_data->dev = dev; + devid = get_device_id(dev); alias = amd_iommu_alias_table[devid]; pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); @@ -478,31 +480,21 @@ static void iommu_flush_complete(struct protection_domain *domain) /* * Command send function for invalidating a device table entry */ -static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) -{ - struct iommu_cmd cmd; - int ret; - - BUG_ON(iommu == NULL); - - memset(&cmd, 0, sizeof(cmd)); - CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); - cmd.data[0] = devid; - - ret = iommu_queue_command(iommu, &cmd); - - return ret; -} - static int iommu_flush_device(struct device *dev) { struct amd_iommu *iommu; + struct iommu_cmd cmd; u16 devid; devid = get_device_id(dev); iommu = amd_iommu_rlookup_table[devid]; - return iommu_queue_inv_dev_entry(iommu, devid); + /* Build command */ + memset(&cmd, 0, sizeof(cmd)); + CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); + cmd.data[0] = devid; + + return iommu_queue_command(iommu, &cmd); } static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, @@ -592,30 +584,43 @@ static void iommu_flush_tlb_pde(struct protection_domain *domain) __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } + /* - * This function flushes all domains that have devices on the given IOMMU + * This function flushes the DTEs for all devices in domain */ -static void flush_all_domains_on_iommu(struct amd_iommu *iommu) +static void iommu_flush_domain_devices(struct protection_domain *domain) +{ + struct iommu_dev_data *dev_data; + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); + + list_for_each_entry(dev_data, &domain->dev_list, list) + iommu_flush_device(dev_data->dev); + + spin_unlock_irqrestore(&domain->lock, flags); +} + +static void iommu_flush_all_domain_devices(void) { - u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; struct protection_domain *domain; unsigned long flags; spin_lock_irqsave(&amd_iommu_pd_lock, flags); list_for_each_entry(domain, &amd_iommu_pd_list, list) { - if (domain->dev_iommu[iommu->index] == 0) - continue; - - spin_lock(&domain->lock); - iommu_queue_inv_iommu_pages(iommu, address, domain->id, 1, 1); + iommu_flush_domain_devices(domain); iommu_flush_complete(domain); - spin_unlock(&domain->lock); } spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } +void amd_iommu_flush_all_devices(void) +{ + iommu_flush_all_domain_devices(); +} + /* * This function uses heavy locking and may disable irqs for some time. But * this is no issue because it is only called during resume. @@ -637,38 +642,6 @@ void amd_iommu_flush_all_domains(void) spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); } -static void flush_all_devices_for_iommu(struct amd_iommu *iommu) -{ - int i; - - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if (iommu != amd_iommu_rlookup_table[i]) - continue; - - iommu_queue_inv_dev_entry(iommu, i); - iommu_completion_wait(iommu); - } -} - -static void flush_devices_by_domain(struct protection_domain *domain) -{ - struct amd_iommu *iommu; - int i; - - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || - (amd_iommu_pd_table[i] != domain)) - continue; - - iommu = amd_iommu_rlookup_table[i]; - if (!iommu) - continue; - - iommu_queue_inv_dev_entry(iommu, i); - iommu_completion_wait(iommu); - } -} - static void reset_iommu_command_buffer(struct amd_iommu *iommu) { pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); @@ -679,17 +652,12 @@ static void reset_iommu_command_buffer(struct amd_iommu *iommu) iommu->reset_in_progress = true; amd_iommu_reset_cmd_buffer(iommu); - flush_all_devices_for_iommu(iommu); - flush_all_domains_on_iommu(iommu); + amd_iommu_flush_all_devices(); + amd_iommu_flush_all_domains(); iommu->reset_in_progress = false; } -void amd_iommu_flush_all_devices(void) -{ - flush_devices_by_domain(NULL); -} - /**************************************************************************** * * The functions below are used the create the page table mappings for @@ -1692,7 +1660,7 @@ static void update_domain(struct protection_domain *domain) return; update_device_table(domain); - flush_devices_by_domain(domain); + iommu_flush_domain_devices(domain); iommu_flush_tlb_pde(domain); domain->updated = false; -- cgit v0.10.2 From 8eed9833346781dd15e3bef35a91b0a40787ea3c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 26 Nov 2009 15:45:41 +0100 Subject: x86/amd-iommu: Move reset_iommu_command_buffer out of locked code This patch removes the ugly contruct where the iommu->lock must be released while before calling the reset_iommu_command_buffer function. Signed-off-by: Joerg Roedel diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0eafca5..b75fcd9 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -285,6 +285,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) break; case EVENT_TYPE_ILL_CMD: printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); + iommu->reset_in_progress = true; reset_iommu_command_buffer(iommu); dump_command(address); break; @@ -407,11 +408,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); - if (unlikely(i == EXIT_LOOP_COUNT)) { - spin_unlock(&iommu->lock); - reset_iommu_command_buffer(iommu); - spin_lock(&iommu->lock); - } + if (unlikely(i == EXIT_LOOP_COUNT)) + iommu->reset_in_progress = true; } /* @@ -458,6 +456,9 @@ static int iommu_completion_wait(struct amd_iommu *iommu) out: spin_unlock_irqrestore(&iommu->lock, flags); + if (iommu->reset_in_progress) + reset_iommu_command_buffer(iommu); + return 0; } @@ -649,8 +650,6 @@ static void reset_iommu_command_buffer(struct amd_iommu *iommu) if (iommu->reset_in_progress) panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); - iommu->reset_in_progress = true; - amd_iommu_reset_cmd_buffer(iommu); amd_iommu_flush_all_devices(); amd_iommu_flush_all_domains(); -- cgit v0.10.2 From 492667dacc0ac9763969155482b1261b34ccf450 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 27 Nov 2009 13:25:47 +0100 Subject: x86/amd-iommu: Remove amd_iommu_pd_table The data that was stored in this table is now available in dev->archdata.iommu. So this table is not longer necessary. This patch removes the remaining uses of that variable and removes it from the code. Signed-off-by: Joerg Roedel diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index f92d1b3..ba19ad4 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -457,9 +457,6 @@ extern unsigned amd_iommu_aperture_order; /* largest PCI device id we expect translation requests for */ extern u16 amd_iommu_last_bdf; -/* data structures for protection domain handling */ -extern struct protection_domain **amd_iommu_pd_table; - /* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b75fcd9..32fb091 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1309,8 +1309,6 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) { u64 pte_root = virt_to_phys(domain->pt_root); - BUG_ON(amd_iommu_pd_table[devid] != NULL); - pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; @@ -1318,20 +1316,10 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) amd_iommu_dev_table[devid].data[2] = domain->id; amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); - - amd_iommu_pd_table[devid] = domain; - } static void clear_dte_entry(u16 devid) { - struct protection_domain *domain = amd_iommu_pd_table[devid]; - - BUG_ON(domain == NULL); - - /* remove domain from the lookup table */ - amd_iommu_pd_table[devid] = NULL; - /* remove entry from the device table seen by the hardware */ amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; amd_iommu_dev_table[devid].data[1] = 0; @@ -1641,15 +1629,11 @@ static struct protection_domain *get_domain(struct device *dev) static void update_device_table(struct protection_domain *domain) { - unsigned long flags; - int i; + struct iommu_dev_data *dev_data; - for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if (amd_iommu_pd_table[i] != domain) - continue; - write_lock_irqsave(&amd_iommu_devtable_lock, flags); - set_dte_entry(i, domain); - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + list_for_each_entry(dev_data, &domain->dev_list, list) { + u16 devid = get_device_id(dev_data->dev); + set_dte_entry(devid, domain); } } @@ -2259,14 +2243,17 @@ free_domains: static void cleanup_domain(struct protection_domain *domain) { + struct iommu_dev_data *dev_data, *next; unsigned long flags; - u16 devid; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) - if (amd_iommu_pd_table[devid] == domain) - clear_dte_entry(devid); + list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { + struct device *dev = dev_data->dev; + + do_detach(dev); + atomic_set(&dev_data->bind, 0); + } write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index fe1686f..7ffc399 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -165,12 +165,6 @@ u16 *amd_iommu_alias_table; struct amd_iommu **amd_iommu_rlookup_table; /* - * The pd table (protection domain table) is used to find the protection domain - * data structure a device belongs to. Indexed with the PCI device id too. - */ -struct protection_domain **amd_iommu_pd_table; - -/* * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap * to know which ones are already in use. */ @@ -1238,15 +1232,6 @@ static int __init amd_iommu_init(void) if (amd_iommu_rlookup_table == NULL) goto free; - /* - * Protection Domain table - maps devices to protection domains - * This table has the same size as the rlookup_table - */ - amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(rlookup_table_size)); - if (amd_iommu_pd_table == NULL) - goto free; - amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, get_order(MAX_DOMAIN_ID/8)); @@ -1314,9 +1299,6 @@ free: free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, get_order(MAX_DOMAIN_ID/8)); - free_pages((unsigned long)amd_iommu_pd_table, - get_order(rlookup_table_size)); - free_pages((unsigned long)amd_iommu_rlookup_table, get_order(rlookup_table_size)); -- cgit v0.10.2 From 3f5ee186f615a720fe78eb33662ae4da57a1eee3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:12 -0200 Subject: perf symbols: Avoid annoying message about loading symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should be properly fixed when we remove the XXX comment in 'perf report', function resolve_symbol. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-1-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 4ed379b..8aab89b 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1516,6 +1516,7 @@ static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) vdso = dso__new("[vdso]"); if (vdso == NULL) goto out_delete_kernel_map; + vdso->loaded = 1; if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id, sizeof(kernel->build_id)) == 0) -- cgit v0.10.2 From 61f37a824d6782503ff66bf653f2e07902b641a1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:13 -0200 Subject: perf symbols: Rename kernel_mapto kernel_map[s]__functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we'll have kernel_map[s]__variables too. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-2-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 18ac5ea..377cb7c 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -184,7 +184,7 @@ got_map: * trick of looking in the whole kernel symbol list. */ if ((long long)ip < 0) { - map = kernel_map; + map = kernel_map__functions; goto got_map; } } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8aab89b..687fb7f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -43,7 +43,7 @@ static struct symbol_conf symbol_conf__defaults = { .try_vmlinux_path = true, }; -static struct rb_root kernel_maps; +static struct rb_root kernel_maps__functions; static void symbols__fixup_end(struct rb_root *self) { @@ -71,7 +71,7 @@ static void symbols__fixup_end(struct rb_root *self) static void kernel_maps__fixup_end(void) { struct map *prev, *curr; - struct rb_node *nd, *prevnd = rb_first(&kernel_maps); + struct rb_node *nd, *prevnd = rb_first(&kernel_maps__functions); if (prevnd == NULL) return; @@ -325,7 +325,7 @@ static int kernel_maps__load_all_kallsyms(void) * kernel_maps__split_kallsyms, when we have split the * maps per module */ - symbols__insert(&kernel_map->dso->functions, sym); + symbols__insert(&kernel_map__functions->dso->functions, sym); } free(line); @@ -346,10 +346,10 @@ out_failure: */ static int kernel_maps__split_kallsyms(symbol_filter_t filter) { - struct map *map = kernel_map; + struct map *map = kernel_map__functions; struct symbol *pos; int count = 0; - struct rb_node *next = rb_first(&kernel_map->dso->functions); + struct rb_node *next = rb_first(&kernel_map__functions->dso->functions); int kernel_range = 0; while (next) { @@ -376,7 +376,7 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) */ pos->start = map->map_ip(map, pos->start); pos->end = map->map_ip(map, pos->end); - } else if (map != kernel_map) { + } else if (map != kernel_map__functions) { char dso_name[PATH_MAX]; struct dso *dso; @@ -399,12 +399,12 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) } if (filter && filter(map, pos)) { - rb_erase(&pos->rb_node, &kernel_map->dso->functions); + rb_erase(&pos->rb_node, &kernel_map__functions->dso->functions); symbol__delete(pos); } else { - if (map != kernel_map) { + if (map != kernel_map__functions) { rb_erase(&pos->rb_node, - &kernel_map->dso->functions); + &kernel_map__functions->dso->functions); symbols__insert(&map->dso->functions, pos); } count++; @@ -420,8 +420,8 @@ static int kernel_maps__load_kallsyms(symbol_filter_t filter) if (kernel_maps__load_all_kallsyms()) return -1; - symbols__fixup_end(&kernel_map->dso->functions); - kernel_map->dso->origin = DSO__ORIG_KERNEL; + symbols__fixup_end(&kernel_map__functions->dso->functions); + kernel_map__functions->dso->origin = DSO__ORIG_KERNEL; return kernel_maps__split_kallsyms(filter); } @@ -431,7 +431,7 @@ size_t kernel_maps__fprintf(FILE *fp) size_t printed = fprintf(fp, "Kernel maps:\n"); struct rb_node *nd; - for (nd = rb_first(&kernel_maps); nd; nd = rb_next(nd)) { + for (nd = rb_first(&kernel_maps__functions); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); printed += fprintf(fp, "Map:"); @@ -1159,17 +1159,17 @@ out: return ret; } -struct map *kernel_map; +struct map *kernel_map__functions; static void kernel_maps__insert(struct map *map) { - maps__insert(&kernel_maps, map); + maps__insert(&kernel_maps__functions, map); } struct symbol *kernel_maps__find_function(u64 ip, struct map **mapp, symbol_filter_t filter) { - struct map *map = maps__find(&kernel_maps, ip); + struct map *map = maps__find(&kernel_maps__functions, ip); if (mapp) *mapp = map; @@ -1178,7 +1178,7 @@ struct symbol *kernel_maps__find_function(u64 ip, struct map **mapp, ip = map->map_ip(map, ip); return map__find_function(map, ip, filter); } else - WARN_ONCE(RB_EMPTY_ROOT(&kernel_maps), + WARN_ONCE(RB_EMPTY_ROOT(&kernel_maps__functions), "Empty kernel_maps, was symbol__init() called?\n"); return NULL; @@ -1188,7 +1188,7 @@ struct map *kernel_maps__find_by_dso_name(const char *name) { struct rb_node *nd; - for (nd = rb_first(&kernel_maps); nd; nd = rb_next(nd)) { + for (nd = rb_first(&kernel_maps__functions); nd; nd = rb_next(nd)) { struct map *map = rb_entry(nd, struct map, rb_node); if (map->dso && strcmp(map->dso->name, name) == 0) @@ -1505,11 +1505,11 @@ static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) if (kernel == NULL) return -1; - kernel_map = map__new2(0, kernel); - if (kernel_map == NULL) + kernel_map__functions = map__new2(0, kernel); + if (kernel_map__functions == NULL) goto out_delete_kernel_dso; - kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; + kernel_map__functions->map_ip = kernel_map__functions->unmap_ip = identity__map_ip; kernel->short_name = "[kernel]"; kernel->kernel = 1; @@ -1522,15 +1522,15 @@ static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) sizeof(kernel->build_id)) == 0) kernel->has_build_id = true; - kernel_maps__insert(kernel_map); + kernel_maps__insert(kernel_map__functions); dsos__add(kernel); dsos__add(vdso); return 0; out_delete_kernel_map: - map__delete(kernel_map); - kernel_map = NULL; + map__delete(kernel_map__functions); + kernel_map__functions = NULL; out_delete_kernel_dso: dso__delete(kernel); return -1; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 65846d0..b42d196 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -105,6 +105,6 @@ size_t kernel_maps__fprintf(FILE *fp); int symbol__init(struct symbol_conf *conf); extern struct list_head dsos; -extern struct map *kernel_map; +extern struct map *kernel_map__functions; extern struct dso *vdso; #endif /* __PERF_SYMBOL */ -- cgit v0.10.2 From b0da954a4759ac19fb80a959e53b613fe376bc12 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:14 -0200 Subject: perf symbols: Split the dsos list into kernel and user parts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to look at modules in dsos__findnew because the kernel events come only with user DSOs. Also we need a way to list just the module DSOs so that we can create multiple sets of maps, now that we will support maps for the variables in a symtab. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-3-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4b58656..4805e6d 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -185,11 +185,11 @@ static int do_write(int fd, const void *buf, size_t size) return 0; } -static int dsos__write_buildid_table(int fd) +static int __dsos__write_buildid_table(struct list_head *head, int fd) { struct dso *pos; - list_for_each_entry(pos, &dsos, node) { + list_for_each_entry(pos, head, node) { int err; struct build_id_event b; size_t len; @@ -212,6 +212,14 @@ static int dsos__write_buildid_table(int fd) return 0; } +static int dsos__write_buildid_table(int fd) +{ + int err = __dsos__write_buildid_table(&dsos__kernel, fd); + if (err == 0) + err = __dsos__write_buildid_table(&dsos__user, fd); + return err; +} + static int perf_header__adds_write(struct perf_header *self, int fd) { int nr_sections; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 687fb7f..dc25231 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -28,8 +28,7 @@ enum dso_origin { DSO__ORIG_NOT_FOUND, }; -static void dsos__add(struct dso *dso); -static struct dso *dsos__find(const char *name); +static void dsos__add(struct list_head *head, struct dso *dso); static struct map *map__new2(u64 start, struct dso *dso); static void kernel_maps__insert(struct map *map); static int dso__load_kernel_sym(struct dso *self, struct map *map, @@ -855,7 +854,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, curr_map->unmap_ip = identity__map_ip; curr_dso->origin = DSO__ORIG_KERNEL; kernel_maps__insert(curr_map); - dsos__add(curr_dso); + dsos__add(&dsos__kernel, curr_dso); } else curr_dso = curr_map->dso; @@ -907,12 +906,12 @@ static bool dso__build_id_equal(const struct dso *self, u8 *build_id) return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0; } -bool dsos__read_build_ids(void) +static bool __dsos__read_build_ids(struct list_head *head) { bool have_build_id = false; struct dso *pos; - list_for_each_entry(pos, &dsos, node) + list_for_each_entry(pos, head, node) if (filename__read_build_id(pos->long_name, pos->build_id, sizeof(pos->build_id)) > 0) { have_build_id = true; @@ -922,6 +921,12 @@ bool dsos__read_build_ids(void) return have_build_id; } +bool dsos__read_build_ids(void) +{ + return __dsos__read_build_ids(&dsos__kernel) || + __dsos__read_build_ids(&dsos__user); +} + /* * Align offset to 4 bytes as needed for note name and descriptor data. */ @@ -1343,7 +1348,7 @@ static int kernel_maps__create_module_maps(void) dso->origin = DSO__ORIG_KMODULE; kernel_maps__insert(map); - dsos__add(dso); + dsos__add(&dsos__kernel, dso); } free(line); @@ -1445,19 +1450,20 @@ out_fixup: return err; } -LIST_HEAD(dsos); +LIST_HEAD(dsos__user); +LIST_HEAD(dsos__kernel); struct dso *vdso; -static void dsos__add(struct dso *dso) +static void dsos__add(struct list_head *head, struct dso *dso) { - list_add_tail(&dso->node, &dsos); + list_add_tail(&dso->node, head); } -static struct dso *dsos__find(const char *name) +static struct dso *dsos__find(struct list_head *head, const char *name) { struct dso *pos; - list_for_each_entry(pos, &dsos, node) + list_for_each_entry(pos, head, node) if (strcmp(pos->name, name) == 0) return pos; return NULL; @@ -1465,12 +1471,12 @@ static struct dso *dsos__find(const char *name) struct dso *dsos__findnew(const char *name) { - struct dso *dso = dsos__find(name); + struct dso *dso = dsos__find(&dsos__user, name); if (!dso) { dso = dso__new(name); if (dso != NULL) { - dsos__add(dso); + dsos__add(&dsos__user, dso); dso__set_basename(dso); } } @@ -1478,26 +1484,38 @@ struct dso *dsos__findnew(const char *name) return dso; } -void dsos__fprintf(FILE *fp) +static void __dsos__fprintf(struct list_head *head, FILE *fp) { struct dso *pos; - list_for_each_entry(pos, &dsos, node) + list_for_each_entry(pos, head, node) dso__fprintf(pos, fp); } -size_t dsos__fprintf_buildid(FILE *fp) +void dsos__fprintf(FILE *fp) +{ + __dsos__fprintf(&dsos__kernel, fp); + __dsos__fprintf(&dsos__user, fp); +} + +static size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp) { struct dso *pos; size_t ret = 0; - list_for_each_entry(pos, &dsos, node) { + list_for_each_entry(pos, head, node) { ret += dso__fprintf_buildid(pos, fp); ret += fprintf(fp, " %s\n", pos->long_name); } return ret; } +size_t dsos__fprintf_buildid(FILE *fp) +{ + return (__dsos__fprintf_buildid(&dsos__kernel, fp) + + __dsos__fprintf_buildid(&dsos__user, fp)); +} + static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) { struct dso *kernel = dso__new(conf->vmlinux_name ?: "[kernel.kallsyms]"); @@ -1523,8 +1541,8 @@ static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) kernel->has_build_id = true; kernel_maps__insert(kernel_map__functions); - dsos__add(kernel); - dsos__add(vdso); + dsos__add(&dsos__kernel, kernel); + dsos__add(&dsos__user, vdso); return 0; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index b42d196..5d0371f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -104,7 +104,7 @@ size_t kernel_maps__fprintf(FILE *fp); int symbol__init(struct symbol_conf *conf); -extern struct list_head dsos; +extern struct list_head dsos__user, dsos__kernel; extern struct map *kernel_map__functions; extern struct dso *vdso; #endif /* __PERF_SYMBOL */ -- cgit v0.10.2 From 605ca4ba017455d39ac6991c58eb1e80fb8af48d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:15 -0200 Subject: perf symbols: Unexport kernel_map__functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit perf annotate was the only user, and it doesn't really need it. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-4-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 377cb7c..0846c8a 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -169,7 +169,6 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) level = '.'; map = thread__find_map(thread, ip); if (map != NULL) { -got_map: ip = map->map_ip(map, ip); sym = map__find_function(map, ip, symbol_filter); } else { @@ -183,10 +182,9 @@ got_map: * the "[vdso]" dso, but for now lets use the old * trick of looking in the whole kernel symbol list. */ - if ((long long)ip < 0) { - map = kernel_map__functions; - goto got_map; - } + if ((long long)ip < 0) + sym = kernel_maps__find_function(ip, &map, + symbol_filter); } dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index dc25231..0b8a298 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -36,6 +36,7 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, unsigned int symbol__priv_size; static int vmlinux_path__nr_entries; static char **vmlinux_path; +static struct map *kernel_map__functions; static struct symbol_conf symbol_conf__defaults = { .use_modules = true, @@ -1164,8 +1165,6 @@ out: return ret; } -struct map *kernel_map__functions; - static void kernel_maps__insert(struct map *map) { maps__insert(&kernel_maps__functions, map); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5d0371f..fb0be9e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -105,6 +105,5 @@ size_t kernel_maps__fprintf(FILE *fp); int symbol__init(struct symbol_conf *conf); extern struct list_head dsos__user, dsos__kernel; -extern struct map *kernel_map__functions; extern struct dso *vdso; #endif /* __PERF_SYMBOL */ -- cgit v0.10.2 From 3610583c29563e23dd038d2870f59c88438bf7a3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:16 -0200 Subject: perf symbols: Add a 'type' field to struct map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That way we will be able to check if the right symtab is loaded in the underlying DSO. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-5-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ded6cf6..a0168f2 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -996,7 +996,7 @@ static void event__process_mmap(event_t *self) struct thread *thread = threads__findnew(self->mmap.pid); if (thread != NULL) { - struct map *map = map__new(&self->mmap, NULL, 0); + struct map *map = map__new(&self->mmap, MAP__FUNCTION, NULL, 0); if (map != NULL) thread__insert_map(thread, map); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 882a953..29543bd 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -80,6 +80,10 @@ typedef union event_union { struct sample_event sample; } event_t; +enum map_type { + MAP__FUNCTION, +}; + struct map { union { struct rb_node rb_node; @@ -87,6 +91,7 @@ struct map { }; u64 start; u64 end; + enum map_type type; u64 pgoff; u64 (*map_ip)(struct map *, u64); u64 (*unmap_ip)(struct map *, u64); @@ -112,9 +117,10 @@ struct symbol; typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); -void map__init(struct map *self, u64 start, u64 end, u64 pgoff, - struct dso *dso); -struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen); +void map__init(struct map *self, enum map_type type, + u64 start, u64 end, u64 pgoff, struct dso *dso); +struct map *map__new(struct mmap_event *event, enum map_type, + char *cwd, int cwdlen); void map__delete(struct map *self); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 41c5c4a..52bb4c6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -20,9 +20,10 @@ static int strcommon(const char *pathname, char *cwd, int cwdlen) return n; } -void map__init(struct map *self, u64 start, u64 end, u64 pgoff, - struct dso *dso) +void map__init(struct map *self, enum map_type type, + u64 start, u64 end, u64 pgoff, struct dso *dso) { + self->type = type; self->start = start; self->end = end; self->pgoff = pgoff; @@ -32,7 +33,8 @@ void map__init(struct map *self, u64 start, u64 end, u64 pgoff, RB_CLEAR_NODE(&self->rb_node); } -struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen) +struct map *map__new(struct mmap_event *event, enum map_type type, + char *cwd, int cwdlen) { struct map *self = malloc(sizeof(*self)); @@ -63,7 +65,7 @@ struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen) if (dso == NULL) goto out_delete; - map__init(self, event->start, event->start + event->len, + map__init(self, type, event->start, event->start + event->len, event->pgoff, dso); if (self->dso == vdso || anon) @@ -103,7 +105,7 @@ void map__fixup_end(struct map *self, struct rb_root *symbols) struct symbol *map__find_function(struct map *self, u64 ip, symbol_filter_t filter) { - if (!self->dso->loaded) { + if (!dso__loaded(self->dso, self->type)) { int nr = dso__load(self->dso, self, filter); if (nr < 0) { diff --git a/tools/perf/util/process_events.c b/tools/perf/util/process_events.c index a920436..5377868 100644 --- a/tools/perf/util/process_events.c +++ b/tools/perf/util/process_events.c @@ -6,7 +6,7 @@ int cwdlen; int process_mmap_event(event_t *event, unsigned long offset, unsigned long head) { - struct map *map = map__new(&event->mmap, cwd, cwdlen); + struct map *map = map__new(&event->mmap, MAP__FUNCTION, cwd, cwdlen); struct thread *thread = threads__findnew(event->mmap.pid); dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0b8a298..45a4a9a7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -29,7 +29,7 @@ enum dso_origin { }; static void dsos__add(struct list_head *head, struct dso *dso); -static struct map *map__new2(u64 start, struct dso *dso); +static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); static void kernel_maps__insert(struct map *map); static int dso__load_kernel_sym(struct dso *self, struct map *map, symbol_filter_t filter); @@ -45,6 +45,16 @@ static struct symbol_conf symbol_conf__defaults = { static struct rb_root kernel_maps__functions; +bool dso__loaded(const struct dso *self, enum map_type type) +{ + return self->loaded & (1 << type); +} + +static void dso__set_loaded(struct dso *self, enum map_type type) +{ + self->loaded |= (1 << type); +} + static void symbols__fixup_end(struct rb_root *self) { struct rb_node *nd, *prevnd = rb_first(self); @@ -387,7 +397,7 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) if (dso == NULL) return -1; - map = map__new2(pos->start, dso); + map = map__new2(pos->start, dso, MAP__FUNCTION); if (map == NULL) { dso__delete(dso); return -1; @@ -846,7 +856,8 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, curr_dso = dso__new(dso_name); if (curr_dso == NULL) goto out_elf_end; - curr_map = map__new2(start, curr_dso); + curr_map = map__new2(start, curr_dso, + MAP__FUNCTION); if (curr_map == NULL) { dso__delete(curr_dso); goto out_elf_end; @@ -1076,7 +1087,7 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) int ret = -1; int fd; - self->loaded = 1; + dso__set_loaded(self, map->type); if (self->kernel) return dso__load_kernel_sym(self, map, filter); @@ -1275,7 +1286,7 @@ static int dsos__set_modules_path(void) * they are loaded) and for vmlinux, where only after we load all the * symbols we'll know where it starts and ends. */ -static struct map *map__new2(u64 start, struct dso *dso) +static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) { struct map *self = malloc(sizeof(*self)); @@ -1283,7 +1294,7 @@ static struct map *map__new2(u64 start, struct dso *dso) /* * ->end will be filled after we load all the symbols */ - map__init(self, start, 0, 0, dso); + map__init(self, type, start, 0, 0, dso); } return self; @@ -1333,7 +1344,7 @@ static int kernel_maps__create_module_maps(void) if (dso == NULL) goto out_delete_line; - map = map__new2(start, dso); + map = map__new2(start, dso, MAP__FUNCTION); if (map == NULL) { dso__delete(dso); goto out_delete_line; @@ -1394,7 +1405,7 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, if (fd < 0) return -1; - self->loaded = 1; + dso__set_loaded(self, map->type); err = dso__load_sym(self, map, self->long_name, fd, filter, 1, 0); close(fd); @@ -1522,7 +1533,7 @@ static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) if (kernel == NULL) return -1; - kernel_map__functions = map__new2(0, kernel); + kernel_map__functions = map__new2(0, kernel, MAP__FUNCTION); if (kernel_map__functions == NULL) goto out_delete_kernel_dso; @@ -1533,7 +1544,7 @@ static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) vdso = dso__new("[vdso]"); if (vdso == NULL) goto out_delete_kernel_map; - vdso->loaded = 1; + dso__set_loaded(vdso, MAP__FUNCTION); if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id, sizeof(kernel->build_id)) == 0) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index fb0be9e..11d4195 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -69,10 +69,10 @@ struct dso { struct symbol *(*find_function)(struct dso *, u64 ip); u8 adjust_symbols:1; u8 slen_calculated:1; - u8 loaded:1; u8 has_build_id:1; u8 kernel:1; unsigned char origin; + u8 loaded; u8 build_id[BUILD_ID_SIZE]; u16 long_name_len; const char *short_name; @@ -85,6 +85,8 @@ void dso__delete(struct dso *self); struct symbol *dso__find_function(struct dso *self, u64 ip); +bool dso__loaded(const struct dso *self, enum map_type type); + struct dso *dsos__findnew(const char *name); int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); void dsos__fprintf(FILE *fp); -- cgit v0.10.2 From 6a4694a433a218c729d336b348a01bfc720da095 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:17 -0200 Subject: perf symbols: Better support for multiple symbol tables per dso MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By using an array of rb_roots in struct dso we can, from a struct map instance to get the right symbol rb_tree more easily. This way we can have just one symbol lookup method for struct map instances, map__find_symbol, instead of one per symtab type (functions, variables). Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-6-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 0846c8a..c32e760 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -170,7 +170,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) map = thread__find_map(thread, ip); if (map != NULL) { ip = map->map_ip(map, ip); - sym = map__find_function(map, ip, symbol_filter); + sym = map__find_symbol(map, ip, symbol_filter); } else { /* * If this is outside of all known maps, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e4b1004..400bef9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -455,7 +455,7 @@ got_map: dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip); *ipp = ip; - return map ? map__find_function(map, ip, NULL) : NULL; + return map ? map__find_symbol(map, ip, NULL) : NULL; } static int call__match(struct symbol *sym) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a0168f2..abe78bb 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -948,7 +948,7 @@ static void event__process_sample(const event_t *self, int counter) map = thread__find_map(thread, ip); if (map != NULL) { ip = map->map_ip(map, ip); - sym = map__find_function(map, ip, symbol_filter); + sym = map__find_symbol(map, ip, symbol_filter); if (sym == NULL) return; userspace_samples++; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 29543bd..3ae3c96 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -81,7 +81,9 @@ typedef union event_union { } event_t; enum map_type { - MAP__FUNCTION, + MAP__FUNCTION = 0, + + MAP__NR_TYPES, }; struct map { @@ -125,10 +127,10 @@ void map__delete(struct map *self); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); -struct symbol *map__find_function(struct map *self, u64 ip, - symbol_filter_t filter); -void map__fixup_start(struct map *self, struct rb_root *symbols); -void map__fixup_end(struct map *self, struct rb_root *symbols); +struct symbol *map__find_symbol(struct map *self, u64 addr, + symbol_filter_t filter); +void map__fixup_start(struct map *self); +void map__fixup_end(struct map *self); int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)); void event__synthesize_threads(int (*process)(event_t *event)); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 52bb4c6..69f94fe 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -82,8 +82,9 @@ void map__delete(struct map *self) free(self); } -void map__fixup_start(struct map *self, struct rb_root *symbols) +void map__fixup_start(struct map *self) { + struct rb_root *symbols = &self->dso->symbols[self->type]; struct rb_node *nd = rb_first(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); @@ -91,8 +92,9 @@ void map__fixup_start(struct map *self, struct rb_root *symbols) } } -void map__fixup_end(struct map *self, struct rb_root *symbols) +void map__fixup_end(struct map *self) { + struct rb_root *symbols = &self->dso->symbols[self->type]; struct rb_node *nd = rb_last(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); @@ -102,8 +104,8 @@ void map__fixup_end(struct map *self, struct rb_root *symbols) #define DSO__DELETED "(deleted)" -struct symbol *map__find_function(struct map *self, u64 ip, - symbol_filter_t filter) +struct symbol *map__find_symbol(struct map *self, u64 addr, + symbol_filter_t filter) { if (!dso__loaded(self->dso, self->type)) { int nr = dso__load(self->dso, self, filter); @@ -138,7 +140,7 @@ struct symbol *map__find_function(struct map *self, u64 ip, } } - return self->dso->find_function(self->dso, ip); + return self->dso->find_symbol(self->dso, self->type, addr); } struct map *map__clone(struct map *self) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 45a4a9a7..9a2dd81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -31,6 +31,7 @@ enum dso_origin { static void dsos__add(struct list_head *head, struct dso *dso); static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); static void kernel_maps__insert(struct map *map); +struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr); static int dso__load_kernel_sym(struct dso *self, struct map *map, symbol_filter_t filter); unsigned int symbol__priv_size; @@ -151,11 +152,13 @@ struct dso *dso__new(const char *name) struct dso *self = malloc(sizeof(*self) + strlen(name) + 1); if (self != NULL) { + int i; strcpy(self->name, name); dso__set_long_name(self, self->name); self->short_name = self->name; - self->functions = RB_ROOT; - self->find_function = dso__find_function; + for (i = 0; i < MAP__NR_TYPES; ++i) + self->symbols[i] = RB_ROOT; + self->find_symbol = dso__find_symbol; self->slen_calculated = 0; self->origin = DSO__ORIG_NOT_FOUND; self->loaded = 0; @@ -180,7 +183,9 @@ static void symbols__delete(struct rb_root *self) void dso__delete(struct dso *self) { - symbols__delete(&self->functions); + int i; + for (i = 0; i < MAP__NR_TYPES; ++i) + symbols__delete(&self->symbols[i]); if (self->long_name != self->name) free(self->long_name); free(self); @@ -234,9 +239,9 @@ static struct symbol *symbols__find(struct rb_root *self, u64 ip) return NULL; } -struct symbol *dso__find_function(struct dso *self, u64 ip) +struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr) { - return symbols__find(&self->functions, ip); + return symbols__find(&self->symbols[type], addr); } int build_id__sprintf(u8 *self, int len, char *bf) @@ -262,17 +267,25 @@ size_t dso__fprintf_buildid(struct dso *self, FILE *fp) return fprintf(fp, "%s", sbuild_id); } +static const char * map_type__name[MAP__NR_TYPES] = { + [MAP__FUNCTION] = "Functions", +}; + size_t dso__fprintf(struct dso *self, FILE *fp) { + int i; struct rb_node *nd; size_t ret = fprintf(fp, "dso: %s (", self->short_name); ret += dso__fprintf_buildid(self, fp); - ret += fprintf(fp, ")\nFunctions:\n"); + ret += fprintf(fp, ")\n"); + for (i = 0; i < MAP__NR_TYPES; ++i) { + ret += fprintf(fp, "%s:\n", map_type__name[i]); - for (nd = rb_first(&self->functions); nd; nd = rb_next(nd)) { - struct symbol *pos = rb_entry(nd, struct symbol, rb_node); - ret += symbol__fprintf(pos, fp); + for (nd = rb_first(&self->symbols[i]); nd; nd = rb_next(nd)) { + struct symbol *pos = rb_entry(nd, struct symbol, rb_node); + ret += symbol__fprintf(pos, fp); + } } return ret; @@ -335,7 +348,7 @@ static int kernel_maps__load_all_kallsyms(void) * kernel_maps__split_kallsyms, when we have split the * maps per module */ - symbols__insert(&kernel_map__functions->dso->functions, sym); + symbols__insert(&kernel_map__functions->dso->symbols[MAP__FUNCTION], sym); } free(line); @@ -359,7 +372,7 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) struct map *map = kernel_map__functions; struct symbol *pos; int count = 0; - struct rb_node *next = rb_first(&kernel_map__functions->dso->functions); + struct rb_node *next = rb_first(&kernel_map__functions->dso->symbols[map->type]); int kernel_range = 0; while (next) { @@ -409,13 +422,13 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) } if (filter && filter(map, pos)) { - rb_erase(&pos->rb_node, &kernel_map__functions->dso->functions); + rb_erase(&pos->rb_node, &kernel_map__functions->dso->symbols[map->type]); symbol__delete(pos); } else { if (map != kernel_map__functions) { rb_erase(&pos->rb_node, - &kernel_map__functions->dso->functions); - symbols__insert(&map->dso->functions, pos); + &kernel_map__functions->dso->symbols[map->type]); + symbols__insert(&map->dso->symbols[map->type], pos); } count++; } @@ -430,7 +443,7 @@ static int kernel_maps__load_kallsyms(symbol_filter_t filter) if (kernel_maps__load_all_kallsyms()) return -1; - symbols__fixup_end(&kernel_map__functions->dso->functions); + symbols__fixup_end(&kernel_map__functions->dso->symbols[MAP__FUNCTION]); kernel_map__functions->dso->origin = DSO__ORIG_KERNEL; return kernel_maps__split_kallsyms(filter); @@ -501,7 +514,7 @@ static int dso__load_perf_map(struct dso *self, struct map *map, if (filter && filter(map, sym)) symbol__delete(sym); else { - symbols__insert(&self->functions, sym); + symbols__insert(&self->symbols[map->type], sym); nr_syms++; } } @@ -699,7 +712,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, struct map *map, if (filter && filter(map, f)) symbol__delete(f); else { - symbols__insert(&self->functions, f); + symbols__insert(&self->symbols[map->type], f); ++nr; } } @@ -721,7 +734,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, struct map *map, if (filter && filter(map, f)) symbol__delete(f); else { - symbols__insert(&self->functions, f); + symbols__insert(&self->symbols[map->type], f); ++nr; } } @@ -896,7 +909,7 @@ new_symbol: if (filter && filter(curr_map, f)) symbol__delete(f); else { - symbols__insert(&curr_dso->functions, f); + symbols__insert(&curr_dso->symbols[curr_map->type], f); nr++; } } @@ -905,7 +918,7 @@ new_symbol: * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) - symbols__fixup_end(&self->functions); + symbols__fixup_end(&self->symbols[map->type]); err = nr; out_elf_end: elf_end(elf); @@ -1191,7 +1204,7 @@ struct symbol *kernel_maps__find_function(u64 ip, struct map **mapp, if (map) { ip = map->map_ip(map, ip); - return map__find_function(map, ip, filter); + return map__find_symbol(map, ip, filter); } else WARN_ONCE(RB_EMPTY_ROOT(&kernel_maps__functions), "Empty kernel_maps, was symbol__init() called?\n"); @@ -1453,8 +1466,8 @@ do_kallsyms: if (err > 0) { out_fixup: - map__fixup_start(map, &map->dso->functions); - map__fixup_end(map, &map->dso->functions); + map__fixup_start(map); + map__fixup_end(map); } return err; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 11d4195..8934814 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -65,8 +65,9 @@ static inline void *symbol__priv(struct symbol *self) struct dso { struct list_head node; - struct rb_root functions; - struct symbol *(*find_function)(struct dso *, u64 ip); + struct rb_root symbols[MAP__NR_TYPES]; + struct symbol *(*find_symbol)(struct dso *self, + enum map_type type, u64 addr); u8 adjust_symbols:1; u8 slen_calculated:1; u8 has_build_id:1; @@ -83,8 +84,6 @@ struct dso { struct dso *dso__new(const char *name); void dso__delete(struct dso *self); -struct symbol *dso__find_function(struct dso *self, u64 ip); - bool dso__loaded(const struct dso *self, enum map_type type); struct dso *dsos__findnew(const char *name); -- cgit v0.10.2 From 4e06255f5cf2acf6a5abfe7df8c9690463259dea Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:18 -0200 Subject: perf symbols: Make the kallsyms loading routines part of the dso class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that the kallsyms loading routines are the direct counterpart of the vmlinux loading ones, i.e. dso__load_kallsyms is the counterpart of dso__load_vmlinux. In the process make them also use the symbols rb tree indexed by map->type, paving the way for supporting other types of symtabs, such as the next one to be supported: variables. This also allowed removal of yet another global variable: kernel_map__functions. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-7-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 9a2dd81..956656f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -37,7 +37,6 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, unsigned int symbol__priv_size; static int vmlinux_path__nr_entries; static char **vmlinux_path; -static struct map *kernel_map__functions; static struct symbol_conf symbol_conf__defaults = { .use_modules = true, @@ -296,10 +295,11 @@ size_t dso__fprintf(struct dso *self, FILE *fp) * so that we can in the next step set the symbol ->end address and then * call kernel_maps__split_kallsyms. */ -static int kernel_maps__load_all_kallsyms(void) +static int dso__load_all_kallsyms(struct dso *self, struct map *map) { char *line = NULL; size_t n; + struct rb_root *root = &self->symbols[map->type]; FILE *file = fopen("/proc/kallsyms", "r"); if (file == NULL) @@ -342,13 +342,11 @@ static int kernel_maps__load_all_kallsyms(void) if (sym == NULL) goto out_delete_line; - /* * We will pass the symbols to the filter later, in - * kernel_maps__split_kallsyms, when we have split the - * maps per module + * map__split_kallsyms, when we have split the maps per module */ - symbols__insert(&kernel_map__functions->dso->symbols[MAP__FUNCTION], sym); + symbols__insert(root, sym); } free(line); @@ -367,12 +365,14 @@ out_failure: * kernel range is broken in several maps, named [kernel].N, as we don't have * the original ELF section names vmlinux have. */ -static int kernel_maps__split_kallsyms(symbol_filter_t filter) +static int dso__split_kallsyms(struct dso *self, struct map *map, + symbol_filter_t filter) { - struct map *map = kernel_map__functions; + struct map *curr_map = map; struct symbol *pos; int count = 0; - struct rb_node *next = rb_first(&kernel_map__functions->dso->symbols[map->type]); + struct rb_root *root = &self->symbols[map->type]; + struct rb_node *next = rb_first(root); int kernel_range = 0; while (next) { @@ -385,9 +385,9 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) if (module) { *module++ = '\0'; - if (strcmp(map->dso->name, module)) { - map = kernel_maps__find_by_dso_name(module); - if (!map) { + if (strcmp(self->name, module)) { + curr_map = kernel_maps__find_by_dso_name(module); + if (curr_map == NULL) { pr_err("/proc/{kallsyms,modules} " "inconsistency!\n"); return -1; @@ -397,9 +397,9 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) * So that we look just like we get from .ko files, * i.e. not prelinked, relative to map->start. */ - pos->start = map->map_ip(map, pos->start); - pos->end = map->map_ip(map, pos->end); - } else if (map != kernel_map__functions) { + pos->start = curr_map->map_ip(curr_map, pos->start); + pos->end = curr_map->map_ip(curr_map, pos->end); + } else if (curr_map != map) { char dso_name[PATH_MAX]; struct dso *dso; @@ -410,25 +410,24 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) if (dso == NULL) return -1; - map = map__new2(pos->start, dso, MAP__FUNCTION); + curr_map = map__new2(pos->start, dso, map->type); if (map == NULL) { dso__delete(dso); return -1; } - map->map_ip = map->unmap_ip = identity__map_ip; - kernel_maps__insert(map); + curr_map->map_ip = curr_map->unmap_ip = identity__map_ip; + kernel_maps__insert(curr_map); ++kernel_range; } - if (filter && filter(map, pos)) { - rb_erase(&pos->rb_node, &kernel_map__functions->dso->symbols[map->type]); + if (filter && filter(curr_map, pos)) { + rb_erase(&pos->rb_node, root); symbol__delete(pos); } else { - if (map != kernel_map__functions) { - rb_erase(&pos->rb_node, - &kernel_map__functions->dso->symbols[map->type]); - symbols__insert(&map->dso->symbols[map->type], pos); + if (curr_map != map) { + rb_erase(&pos->rb_node, root); + symbols__insert(&curr_map->dso->symbols[curr_map->type], pos); } count++; } @@ -438,15 +437,16 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) } -static int kernel_maps__load_kallsyms(symbol_filter_t filter) +static int dso__load_kallsyms(struct dso *self, struct map *map, + symbol_filter_t filter) { - if (kernel_maps__load_all_kallsyms()) + if (dso__load_all_kallsyms(self, map) < 0) return -1; - symbols__fixup_end(&kernel_map__functions->dso->symbols[MAP__FUNCTION]); - kernel_map__functions->dso->origin = DSO__ORIG_KERNEL; + symbols__fixup_end(&self->symbols[map->type]); + self->origin = DSO__ORIG_KERNEL; - return kernel_maps__split_kallsyms(filter); + return dso__split_kallsyms(self, map, filter); } size_t kernel_maps__fprintf(FILE *fp) @@ -1457,9 +1457,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, if (err <= 0) { pr_info("The file %s cannot be used, " "trying to use /proc/kallsyms...", self->long_name); - sleep(2); do_kallsyms: - err = kernel_maps__load_kallsyms(filter); + err = dso__load_kallsyms(self, map, filter); if (err > 0 && !is_kallsyms) dso__set_long_name(self, strdup("[kernel.kallsyms]")); } @@ -1541,18 +1540,19 @@ size_t dsos__fprintf_buildid(FILE *fp) static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) { + struct map *kmap; struct dso *kernel = dso__new(conf->vmlinux_name ?: "[kernel.kallsyms]"); if (kernel == NULL) return -1; - kernel_map__functions = map__new2(0, kernel, MAP__FUNCTION); - if (kernel_map__functions == NULL) + kmap = map__new2(0, kernel, MAP__FUNCTION); + if (kmap == NULL) goto out_delete_kernel_dso; - kernel_map__functions->map_ip = kernel_map__functions->unmap_ip = identity__map_ip; - kernel->short_name = "[kernel]"; - kernel->kernel = 1; + kmap->map_ip = kmap->unmap_ip = identity__map_ip; + kernel->short_name = "[kernel]"; + kernel->kernel = 1; vdso = dso__new("[vdso]"); if (vdso == NULL) @@ -1563,15 +1563,14 @@ static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) sizeof(kernel->build_id)) == 0) kernel->has_build_id = true; - kernel_maps__insert(kernel_map__functions); + kernel_maps__insert(kmap); dsos__add(&dsos__kernel, kernel); dsos__add(&dsos__user, vdso); return 0; out_delete_kernel_map: - map__delete(kernel_map__functions); - kernel_map__functions = NULL; + map__delete(kmap); out_delete_kernel_dso: dso__delete(kernel); return -1; -- cgit v0.10.2 From 23ea4a3fadc6b1692dec935397ea15e2affc1cba Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:19 -0200 Subject: perf symbols: Kernel_maps should be an array of MAP__NR_TYPES entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So that we can support multiple symbol table types. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-8-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 956656f..581db4c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -29,6 +29,7 @@ enum dso_origin { }; static void dsos__add(struct list_head *head, struct dso *dso); +static struct map *kernel_maps__find_by_dso_name(const char *name); static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); static void kernel_maps__insert(struct map *map); struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr); @@ -43,7 +44,7 @@ static struct symbol_conf symbol_conf__defaults = { .try_vmlinux_path = true, }; -static struct rb_root kernel_maps__functions; +static struct rb_root kernel_maps[MAP__NR_TYPES]; bool dso__loaded(const struct dso *self, enum map_type type) { @@ -78,10 +79,10 @@ static void symbols__fixup_end(struct rb_root *self) curr->end = roundup(curr->start, 4096); } -static void kernel_maps__fixup_end(void) +static void __kernel_maps__fixup_end(struct rb_root *root) { struct map *prev, *curr; - struct rb_node *nd, *prevnd = rb_first(&kernel_maps__functions); + struct rb_node *nd, *prevnd = rb_first(root); if (prevnd == NULL) return; @@ -101,6 +102,13 @@ static void kernel_maps__fixup_end(void) curr->end = ~0UL; } +static void kernel_maps__fixup_end(void) +{ + int i; + for (i = 0; i < MAP__NR_TYPES; ++i) + __kernel_maps__fixup_end(&kernel_maps[i]); +} + static struct symbol *symbol__new(u64 start, u64 len, const char *name) { size_t namelen = strlen(name) + 1; @@ -449,12 +457,12 @@ static int dso__load_kallsyms(struct dso *self, struct map *map, return dso__split_kallsyms(self, map, filter); } -size_t kernel_maps__fprintf(FILE *fp) +static size_t __kernel_maps__fprintf(enum map_type type, FILE *fp) { - size_t printed = fprintf(fp, "Kernel maps:\n"); + size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); struct rb_node *nd; - for (nd = rb_first(&kernel_maps__functions); nd; nd = rb_next(nd)) { + for (nd = rb_first(&kernel_maps[type]); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); printed += fprintf(fp, "Map:"); @@ -465,6 +473,16 @@ size_t kernel_maps__fprintf(FILE *fp) } } + return printed; +} + +size_t kernel_maps__fprintf(FILE *fp) +{ + size_t printed = fprintf(fp, "Kernel maps:\n"); + int i; + for (i = 0; i < MAP__NR_TYPES; ++i) + printed += __kernel_maps__fprintf(i, fp); + return printed + fprintf(fp, "END kernel maps\n"); } @@ -1191,13 +1209,14 @@ out: static void kernel_maps__insert(struct map *map) { - maps__insert(&kernel_maps__functions, map); + maps__insert(&kernel_maps[map->type], map); } -struct symbol *kernel_maps__find_function(u64 ip, struct map **mapp, - symbol_filter_t filter) +static struct symbol *kernel_maps__find_symbol(u64 ip, enum map_type type, + struct map **mapp, + symbol_filter_t filter) { - struct map *map = maps__find(&kernel_maps__functions, ip); + struct map *map = maps__find(&kernel_maps[type], ip); if (mapp) *mapp = map; @@ -1206,17 +1225,23 @@ struct symbol *kernel_maps__find_function(u64 ip, struct map **mapp, ip = map->map_ip(map, ip); return map__find_symbol(map, ip, filter); } else - WARN_ONCE(RB_EMPTY_ROOT(&kernel_maps__functions), + WARN_ONCE(RB_EMPTY_ROOT(&kernel_maps[type]), "Empty kernel_maps, was symbol__init() called?\n"); return NULL; } -struct map *kernel_maps__find_by_dso_name(const char *name) +struct symbol *kernel_maps__find_function(u64 ip, struct map **mapp, + symbol_filter_t filter) +{ + return kernel_maps__find_symbol(ip, MAP__FUNCTION, mapp, filter); +} + +static struct map *kernel_maps__find_by_dso_name(const char *name) { struct rb_node *nd; - for (nd = rb_first(&kernel_maps__functions); nd; nd = rb_next(nd)) { + for (nd = rb_first(&kernel_maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) { struct map *map = rb_entry(nd, struct map, rb_node); if (map->dso && strcmp(map->dso->name, name) == 0) diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 74cba64..54580bb 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -28,7 +28,6 @@ struct map *maps__find(struct rb_root *maps, u64 ip); struct symbol *kernel_maps__find_function(const u64 ip, struct map **mapp, symbol_filter_t filter); -struct map *kernel_maps__find_by_dso_name(const char *name); static inline struct map *thread__find_map(struct thread *self, u64 ip) { -- cgit v0.10.2 From 95011c600740837288a3b34b411244a4d9157c4e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:20 -0200 Subject: perf symbols: Support multiple symtabs in struct thread MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Making the routines that were so far specific to the kernel maps useful for all threads. This is done by making the kernel maps be contained in a kernel "thread". This gets the kernel specific routines closer to the userspace counterparts, which will help in reducing the boilerplate for resolving a symbol, as will be demonstrated in the next patches. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-9-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index c32e760..3ebd70b 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -167,7 +167,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) map ? map->dso->long_name : ""); } else if (event->header.misc & PERF_RECORD_MISC_USER) { level = '.'; - map = thread__find_map(thread, ip); + map = thread__find_map(thread, MAP__FUNCTION, ip); if (map != NULL) { ip = map->map_ip(map, ip); sym = map__find_symbol(map, ip, symbol_filter); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 400bef9..9bd20c2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -422,7 +422,7 @@ resolve_symbol(struct thread *thread, struct map **mapp, u64 *ipp) if (!thread) return NULL; - map = thread__find_map(thread, ip); + map = thread__find_map(thread, MAP__FUNCTION, ip); if (map != NULL) { /* * We have to do this here as we may have a dso diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index abe78bb..bf6730c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -945,7 +945,7 @@ static void event__process_sample(const event_t *self, int counter) if (thread == NULL) return; - map = thread__find_map(thread, ip); + map = thread__find_map(thread, MAP__FUNCTION, ip); if (map != NULL) { ip = map->map_ip(map, ip); sym = map__find_symbol(map, ip, symbol_filter); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 581db4c..b6a2941 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -29,12 +29,11 @@ enum dso_origin { }; static void dsos__add(struct list_head *head, struct dso *dso); -static struct map *kernel_maps__find_by_dso_name(const char *name); +static struct map *thread__find_map_by_name(struct thread *self, char *name); static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); -static void kernel_maps__insert(struct map *map); struct symbol *dso__find_symbol(struct dso *self, enum map_type type, u64 addr); static int dso__load_kernel_sym(struct dso *self, struct map *map, - symbol_filter_t filter); + struct thread *thread, symbol_filter_t filter); unsigned int symbol__priv_size; static int vmlinux_path__nr_entries; static char **vmlinux_path; @@ -44,7 +43,7 @@ static struct symbol_conf symbol_conf__defaults = { .try_vmlinux_path = true, }; -static struct rb_root kernel_maps[MAP__NR_TYPES]; +static struct thread kthread_mem, *kthread = &kthread_mem; bool dso__loaded(const struct dso *self, enum map_type type) { @@ -79,10 +78,10 @@ static void symbols__fixup_end(struct rb_root *self) curr->end = roundup(curr->start, 4096); } -static void __kernel_maps__fixup_end(struct rb_root *root) +static void __thread__fixup_maps_end(struct thread *self, enum map_type type) { struct map *prev, *curr; - struct rb_node *nd, *prevnd = rb_first(root); + struct rb_node *nd, *prevnd = rb_first(&self->maps[type]); if (prevnd == NULL) return; @@ -102,11 +101,11 @@ static void __kernel_maps__fixup_end(struct rb_root *root) curr->end = ~0UL; } -static void kernel_maps__fixup_end(void) +static void thread__fixup_maps_end(struct thread *self) { int i; for (i = 0; i < MAP__NR_TYPES; ++i) - __kernel_maps__fixup_end(&kernel_maps[i]); + __thread__fixup_maps_end(self, i); } static struct symbol *symbol__new(u64 start, u64 len, const char *name) @@ -274,25 +273,16 @@ size_t dso__fprintf_buildid(struct dso *self, FILE *fp) return fprintf(fp, "%s", sbuild_id); } -static const char * map_type__name[MAP__NR_TYPES] = { - [MAP__FUNCTION] = "Functions", -}; - -size_t dso__fprintf(struct dso *self, FILE *fp) +size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) { - int i; struct rb_node *nd; size_t ret = fprintf(fp, "dso: %s (", self->short_name); ret += dso__fprintf_buildid(self, fp); ret += fprintf(fp, ")\n"); - for (i = 0; i < MAP__NR_TYPES; ++i) { - ret += fprintf(fp, "%s:\n", map_type__name[i]); - - for (nd = rb_first(&self->symbols[i]); nd; nd = rb_next(nd)) { - struct symbol *pos = rb_entry(nd, struct symbol, rb_node); - ret += symbol__fprintf(pos, fp); - } + for (nd = rb_first(&self->symbols[type]); nd; nd = rb_next(nd)) { + struct symbol *pos = rb_entry(nd, struct symbol, rb_node); + ret += symbol__fprintf(pos, fp); } return ret; @@ -373,7 +363,7 @@ out_failure: * kernel range is broken in several maps, named [kernel].N, as we don't have * the original ELF section names vmlinux have. */ -static int dso__split_kallsyms(struct dso *self, struct map *map, +static int dso__split_kallsyms(struct dso *self, struct map *map, struct thread *thread, symbol_filter_t filter) { struct map *curr_map = map; @@ -394,10 +384,10 @@ static int dso__split_kallsyms(struct dso *self, struct map *map, *module++ = '\0'; if (strcmp(self->name, module)) { - curr_map = kernel_maps__find_by_dso_name(module); + curr_map = thread__find_map_by_name(thread, module); if (curr_map == NULL) { - pr_err("/proc/{kallsyms,modules} " - "inconsistency!\n"); + pr_debug("/proc/{kallsyms,modules} " + "inconsistency!\n"); return -1; } } @@ -425,7 +415,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map, } curr_map->map_ip = curr_map->unmap_ip = identity__map_ip; - kernel_maps__insert(curr_map); + __thread__insert_map(thread, curr_map); ++kernel_range; } @@ -446,7 +436,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map, static int dso__load_kallsyms(struct dso *self, struct map *map, - symbol_filter_t filter) + struct thread *thread, symbol_filter_t filter) { if (dso__load_all_kallsyms(self, map) < 0) return -1; @@ -454,35 +444,13 @@ static int dso__load_kallsyms(struct dso *self, struct map *map, symbols__fixup_end(&self->symbols[map->type]); self->origin = DSO__ORIG_KERNEL; - return dso__split_kallsyms(self, map, filter); -} - -static size_t __kernel_maps__fprintf(enum map_type type, FILE *fp) -{ - size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); - struct rb_node *nd; - - for (nd = rb_first(&kernel_maps[type]); nd; nd = rb_next(nd)) { - struct map *pos = rb_entry(nd, struct map, rb_node); - - printed += fprintf(fp, "Map:"); - printed += map__fprintf(pos, fp); - if (verbose > 1) { - printed += dso__fprintf(pos->dso, fp); - printed += fprintf(fp, "--\n"); - } - } - - return printed; + return dso__split_kallsyms(self, map, thread, filter); } size_t kernel_maps__fprintf(FILE *fp) { size_t printed = fprintf(fp, "Kernel maps:\n"); - int i; - for (i = 0; i < MAP__NR_TYPES; ++i) - printed += __kernel_maps__fprintf(i, fp); - + printed += thread__fprintf_maps(kthread, fp); return printed + fprintf(fp, "END kernel maps\n"); } @@ -772,9 +740,9 @@ out: return 0; } -static int dso__load_sym(struct dso *self, struct map *map, const char *name, - int fd, symbol_filter_t filter, int kernel, - int kmodule) +static int dso__load_sym(struct dso *self, struct map *map, + struct thread *thread, const char *name, int fd, + symbol_filter_t filter, int kernel, int kmodule) { struct map *curr_map = map; struct dso *curr_dso = self; @@ -877,7 +845,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, snprintf(dso_name, sizeof(dso_name), "%s%s", self->short_name, section_name); - curr_map = kernel_maps__find_by_dso_name(dso_name); + curr_map = thread__find_map_by_name(thread, dso_name); if (curr_map == NULL) { u64 start = sym.st_value; @@ -896,7 +864,7 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, curr_map->map_ip = identity__map_ip; curr_map->unmap_ip = identity__map_ip; curr_dso->origin = DSO__ORIG_KERNEL; - kernel_maps__insert(curr_map); + __thread__insert_map(kthread, curr_map); dsos__add(&dsos__kernel, curr_dso); } else curr_dso = curr_map->dso; @@ -1121,7 +1089,7 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) dso__set_loaded(self, map->type); if (self->kernel) - return dso__load_kernel_sym(self, map, filter); + return dso__load_kernel_sym(self, map, kthread, filter); name = malloc(size); if (!name) @@ -1186,7 +1154,7 @@ compare_build_id: fd = open(name, O_RDONLY); } while (fd < 0); - ret = dso__load_sym(self, map, name, fd, filter, 0, 0); + ret = dso__load_sym(self, map, NULL, name, fd, filter, 0, 0); close(fd); /* @@ -1207,16 +1175,11 @@ out: return ret; } -static void kernel_maps__insert(struct map *map) -{ - maps__insert(&kernel_maps[map->type], map); -} - -static struct symbol *kernel_maps__find_symbol(u64 ip, enum map_type type, - struct map **mapp, - symbol_filter_t filter) +static struct symbol *thread__find_symbol(struct thread *self, u64 ip, + enum map_type type, struct map **mapp, + symbol_filter_t filter) { - struct map *map = maps__find(&kernel_maps[type], ip); + struct map *map = thread__find_map(self, type, ip); if (mapp) *mapp = map; @@ -1224,9 +1187,7 @@ static struct symbol *kernel_maps__find_symbol(u64 ip, enum map_type type, if (map) { ip = map->map_ip(map, ip); return map__find_symbol(map, ip, filter); - } else - WARN_ONCE(RB_EMPTY_ROOT(&kernel_maps[type]), - "Empty kernel_maps, was symbol__init() called?\n"); + } return NULL; } @@ -1234,14 +1195,14 @@ static struct symbol *kernel_maps__find_symbol(u64 ip, enum map_type type, struct symbol *kernel_maps__find_function(u64 ip, struct map **mapp, symbol_filter_t filter) { - return kernel_maps__find_symbol(ip, MAP__FUNCTION, mapp, filter); + return thread__find_symbol(kthread, ip, MAP__FUNCTION, mapp, filter); } -static struct map *kernel_maps__find_by_dso_name(const char *name) +static struct map *thread__find_map_by_name(struct thread *self, char *name) { struct rb_node *nd; - for (nd = rb_first(&kernel_maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) { + for (nd = rb_first(&self->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) { struct map *map = rb_entry(nd, struct map, rb_node); if (map->dso && strcmp(map->dso->name, name) == 0) @@ -1285,7 +1246,7 @@ static int dsos__set_modules_path_dir(char *dirname) (int)(dot - dent->d_name), dent->d_name); strxfrchar(dso_name, '-', '_'); - map = kernel_maps__find_by_dso_name(dso_name); + map = thread__find_map_by_name(kthread, dso_name); if (map == NULL) continue; @@ -1338,7 +1299,7 @@ static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) return self; } -static int kernel_maps__create_module_maps(void) +static int thread__create_module_maps(struct thread *self) { char *line = NULL; size_t n; @@ -1395,7 +1356,7 @@ static int kernel_maps__create_module_maps(void) dso->has_build_id = true; dso->origin = DSO__ORIG_KMODULE; - kernel_maps__insert(map); + __thread__insert_map(self, map); dsos__add(&dsos__kernel, dso); } @@ -1410,7 +1371,7 @@ out_failure: return -1; } -static int dso__load_vmlinux(struct dso *self, struct map *map, +static int dso__load_vmlinux(struct dso *self, struct map *map, struct thread *thread, const char *vmlinux, symbol_filter_t filter) { int err = -1, fd; @@ -1444,15 +1405,14 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, return -1; dso__set_loaded(self, map->type); - err = dso__load_sym(self, map, self->long_name, fd, filter, 1, 0); - + err = dso__load_sym(self, map, thread, self->long_name, fd, filter, 1, 0); close(fd); return err; } static int dso__load_kernel_sym(struct dso *self, struct map *map, - symbol_filter_t filter) + struct thread *thread, symbol_filter_t filter) { int err; bool is_kallsyms; @@ -1462,8 +1422,8 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, pr_debug("Looking at the vmlinux_path (%d entries long)\n", vmlinux_path__nr_entries); for (i = 0; i < vmlinux_path__nr_entries; ++i) { - err = dso__load_vmlinux(self, map, vmlinux_path[i], - filter); + err = dso__load_vmlinux(self, map, thread, + vmlinux_path[i], filter); if (err > 0) { pr_debug("Using %s for symbols\n", vmlinux_path[i]); @@ -1478,12 +1438,12 @@ static int dso__load_kernel_sym(struct dso *self, struct map *map, if (is_kallsyms) goto do_kallsyms; - err = dso__load_vmlinux(self, map, self->long_name, filter); + err = dso__load_vmlinux(self, map, thread, self->long_name, filter); if (err <= 0) { pr_info("The file %s cannot be used, " "trying to use /proc/kallsyms...", self->long_name); do_kallsyms: - err = dso__load_kallsyms(self, map, filter); + err = dso__load_kallsyms(self, map, thread, filter); if (err > 0 && !is_kallsyms) dso__set_long_name(self, strdup("[kernel.kallsyms]")); } @@ -1535,8 +1495,11 @@ static void __dsos__fprintf(struct list_head *head, FILE *fp) { struct dso *pos; - list_for_each_entry(pos, head, node) - dso__fprintf(pos, fp); + list_for_each_entry(pos, head, node) { + int i; + for (i = 0; i < MAP__NR_TYPES; ++i) + dso__fprintf(pos, i, fp); + } } void dsos__fprintf(FILE *fp) @@ -1563,10 +1526,10 @@ size_t dsos__fprintf_buildid(FILE *fp) __dsos__fprintf_buildid(&dsos__user, fp)); } -static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) +static int thread__create_kernel_map(struct thread *self, const char *vmlinux) { struct map *kmap; - struct dso *kernel = dso__new(conf->vmlinux_name ?: "[kernel.kallsyms]"); + struct dso *kernel = dso__new(vmlinux ?: "[kernel.kallsyms]"); if (kernel == NULL) return -1; @@ -1588,7 +1551,7 @@ static int kernel_maps__create_kernel_map(const struct symbol_conf *conf) sizeof(kernel->build_id)) == 0) kernel->has_build_id = true; - kernel_maps__insert(kmap); + __thread__insert_map(self, kmap); dsos__add(&dsos__kernel, kernel); dsos__add(&dsos__user, vdso); @@ -1656,32 +1619,28 @@ out_fail: return -1; } -static int kernel_maps__init(const struct symbol_conf *conf) +int symbol__init(struct symbol_conf *conf) { const struct symbol_conf *pconf = conf ?: &symbol_conf__defaults; + elf_version(EV_CURRENT); symbol__priv_size = pconf->priv_size; + thread__init(kthread, 0); if (pconf->try_vmlinux_path && vmlinux_path__init() < 0) return -1; - if (kernel_maps__create_kernel_map(pconf) < 0) { + if (thread__create_kernel_map(kthread, pconf->vmlinux_name) < 0) { vmlinux_path__exit(); return -1; } - if (pconf->use_modules && kernel_maps__create_module_maps() < 0) + if (pconf->use_modules && thread__create_module_maps(kthread) < 0) pr_debug("Failed to load list of modules in use, " "continuing...\n"); /* * Now that we have all the maps created, just set the ->end of them: */ - kernel_maps__fixup_end(); + thread__fixup_maps_end(kthread); return 0; } - -int symbol__init(struct symbol_conf *conf) -{ - elf_version(EV_CURRENT); - return kernel_maps__init(conf); -} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 8934814..3f9e4a4 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -92,7 +92,7 @@ void dsos__fprintf(FILE *fp); size_t dsos__fprintf_buildid(FILE *fp); size_t dso__fprintf_buildid(struct dso *self, FILE *fp); -size_t dso__fprintf(struct dso *self, FILE *fp); +size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); char dso__symtab_origin(const struct dso *self); void dso__set_build_id(struct dso *self, void *build_id); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 1796625..2229f82 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -9,17 +9,26 @@ static struct rb_root threads; static struct thread *last_match; +void thread__init(struct thread *self, pid_t pid) +{ + int i; + self->pid = pid; + self->comm = NULL; + for (i = 0; i < MAP__NR_TYPES; ++i) { + self->maps[i] = RB_ROOT; + INIT_LIST_HEAD(&self->removed_maps[i]); + } +} + static struct thread *thread__new(pid_t pid) { struct thread *self = zalloc(sizeof(*self)); if (self != NULL) { - self->pid = pid; + thread__init(self, pid); self->comm = malloc(32); if (self->comm) snprintf(self->comm, 32, ":%d", self->pid); - self->maps = RB_ROOT; - INIT_LIST_HEAD(&self->removed_maps); } return self; @@ -44,24 +53,68 @@ int thread__comm_len(struct thread *self) return self->comm_len; } -static size_t thread__fprintf(struct thread *self, FILE *fp) +static const char *map_type__name[MAP__NR_TYPES] = { + [MAP__FUNCTION] = "Functions", +}; + +static size_t __thread__fprintf_maps(struct thread *self, + enum map_type type, FILE *fp) { + size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); struct rb_node *nd; - struct map *pos; - size_t ret = fprintf(fp, "Thread %d %s\nCurrent maps:\n", - self->pid, self->comm); - for (nd = rb_first(&self->maps); nd; nd = rb_next(nd)) { - pos = rb_entry(nd, struct map, rb_node); - ret += map__fprintf(pos, fp); + for (nd = rb_first(&self->maps[type]); nd; nd = rb_next(nd)) { + struct map *pos = rb_entry(nd, struct map, rb_node); + printed += fprintf(fp, "Map:"); + printed += map__fprintf(pos, fp); + if (verbose > 1) { + printed += dso__fprintf(pos->dso, type, fp); + printed += fprintf(fp, "--\n"); + } } - ret = fprintf(fp, "Removed maps:\n"); + return printed; +} + +size_t thread__fprintf_maps(struct thread *self, FILE *fp) +{ + size_t printed = 0, i; + for (i = 0; i < MAP__NR_TYPES; ++i) + printed += __thread__fprintf_maps(self, i, fp); + return printed; +} - list_for_each_entry(pos, &self->removed_maps, node) - ret += map__fprintf(pos, fp); +static size_t __thread__fprintf_removed_maps(struct thread *self, + enum map_type type, FILE *fp) +{ + struct map *pos; + size_t printed = 0; + + list_for_each_entry(pos, &self->removed_maps[type], node) { + printed += fprintf(fp, "Map:"); + printed += map__fprintf(pos, fp); + if (verbose > 1) { + printed += dso__fprintf(pos->dso, type, fp); + printed += fprintf(fp, "--\n"); + } + } + return printed; +} - return ret; +static size_t thread__fprintf_removed_maps(struct thread *self, FILE *fp) +{ + size_t printed = 0, i; + for (i = 0; i < MAP__NR_TYPES; ++i) + printed += __thread__fprintf_removed_maps(self, i, fp); + return printed; +} + +static size_t thread__fprintf(struct thread *self, FILE *fp) +{ + size_t printed = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); + printed += thread__fprintf_removed_maps(self, fp); + printed += fprintf(fp, "Removed maps:\n"); + return printed + thread__fprintf_removed_maps(self, fp); } struct thread *threads__findnew(pid_t pid) @@ -117,7 +170,8 @@ struct thread *register_idle_thread(void) static void thread__remove_overlappings(struct thread *self, struct map *map) { - struct rb_node *next = rb_first(&self->maps); + struct rb_root *root = &self->maps[map->type]; + struct rb_node *next = rb_first(root); while (next) { struct map *pos = rb_entry(next, struct map, rb_node); @@ -132,13 +186,13 @@ static void thread__remove_overlappings(struct thread *self, struct map *map) map__fprintf(pos, stderr); } - rb_erase(&pos->rb_node, &self->maps); + rb_erase(&pos->rb_node, root); /* * We may have references to this map, for instance in some * hist_entry instances, so just move them to a separate * list. */ - list_add_tail(&pos->node, &self->removed_maps); + list_add_tail(&pos->node, &self->removed_maps[map->type]); } } @@ -185,12 +239,26 @@ struct map *maps__find(struct rb_root *maps, u64 ip) void thread__insert_map(struct thread *self, struct map *map) { thread__remove_overlappings(self, map); - maps__insert(&self->maps, map); + maps__insert(&self->maps[map->type], map); } -int thread__fork(struct thread *self, struct thread *parent) +static int thread__clone_maps(struct thread *self, struct thread *parent, + enum map_type type) { struct rb_node *nd; + for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) { + struct map *map = rb_entry(nd, struct map, rb_node); + struct map *new = map__clone(map); + if (new == NULL) + return -ENOMEM; + thread__insert_map(self, new); + } + return 0; +} + +int thread__fork(struct thread *self, struct thread *parent) +{ + int i; if (self->comm) free(self->comm); @@ -198,14 +266,9 @@ int thread__fork(struct thread *self, struct thread *parent) if (!self->comm) return -ENOMEM; - for (nd = rb_first(&parent->maps); nd; nd = rb_next(nd)) { - struct map *map = rb_entry(nd, struct map, rb_node); - struct map *new = map__clone(map); - if (!new) + for (i = 0; i < MAP__NR_TYPES; ++i) + if (thread__clone_maps(self, parent, i) < 0) return -ENOMEM; - thread__insert_map(self, new); - } - return 0; } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 54580bb..3bdd9b2 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -7,20 +7,22 @@ struct thread { struct rb_node rb_node; - struct rb_root maps; - struct list_head removed_maps; + struct rb_root maps[MAP__NR_TYPES]; + struct list_head removed_maps[MAP__NR_TYPES]; pid_t pid; char shortname[3]; char *comm; int comm_len; }; +void thread__init(struct thread *self, pid_t pid); int thread__set_comm(struct thread *self, const char *comm); int thread__comm_len(struct thread *self); struct thread *threads__findnew(pid_t pid); struct thread *register_idle_thread(void); void thread__insert_map(struct thread *self, struct map *map); int thread__fork(struct thread *self, struct thread *parent); +size_t thread__fprintf_maps(struct thread *self, FILE *fp); size_t threads__fprintf(FILE *fp); void maps__insert(struct rb_root *maps, struct map *map); @@ -29,9 +31,14 @@ struct map *maps__find(struct rb_root *maps, u64 ip); struct symbol *kernel_maps__find_function(const u64 ip, struct map **mapp, symbol_filter_t filter); -static inline struct map *thread__find_map(struct thread *self, u64 ip) +static inline struct map *thread__find_map(struct thread *self, + enum map_type type, u64 ip) { - return self ? maps__find(&self->maps, ip) : NULL; + return self ? maps__find(&self->maps[type], ip) : NULL; } +static inline void __thread__insert_map(struct thread *self, struct map *map) +{ + maps__insert(&self->maps[map->type], map); +} #endif /* __PERF_THREAD_H */ -- cgit v0.10.2 From 1de8e24520ffdcf2a90c842eed937f59079a2abd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:21 -0200 Subject: perf symbols: When not using modules, discard its symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-10-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b6a2941..b788c2f 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -381,6 +381,9 @@ static int dso__split_kallsyms(struct dso *self, struct map *map, struct thread module = strchr(pos->name, '\t'); if (module) { + if (!thread->use_modules) + goto discard_symbol; + *module++ = '\0'; if (strcmp(self->name, module)) { @@ -420,7 +423,7 @@ static int dso__split_kallsyms(struct dso *self, struct map *map, struct thread } if (filter && filter(curr_map, pos)) { - rb_erase(&pos->rb_node, root); +discard_symbol: rb_erase(&pos->rb_node, root); symbol__delete(pos); } else { if (curr_map != map) { @@ -1635,6 +1638,7 @@ int symbol__init(struct symbol_conf *conf) return -1; } + kthread->use_modules = pconf->use_modules; if (pconf->use_modules && thread__create_module_maps(kthread) < 0) pr_debug("Failed to load list of modules in use, " "continuing...\n"); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 3bdd9b2..59b0d9b 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -10,6 +10,7 @@ struct thread { struct rb_root maps[MAP__NR_TYPES]; struct list_head removed_maps[MAP__NR_TYPES]; pid_t pid; + bool use_modules; char shortname[3]; char *comm; int comm_len; -- cgit v0.10.2 From 62daacb51a2bf8480e6f6b3696b03f102fc15eb0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:22 -0200 Subject: perf tools: Reorganize event processing routines, lotsa dups killed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While implementing event__preprocess_sample, that will do all of the symbol lookup in one convenient function, I noticed that util/process_event.[ch] were not being used at all, then started looking if there were other functions that could be shared and... All those functions really don't need to receive offset + head, the only thing they did was common to all of them, so do it at one place instead. Stats about number of each type of event processed now is done in a central place. Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: John Kacur Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-11-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index de37d49..f1537a9 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -369,7 +369,6 @@ LIB_H += util/sort.h LIB_H += util/hist.h LIB_H += util/thread.h LIB_H += util/data_map.h -LIB_H += util/process_events.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -412,7 +411,6 @@ LIB_OBJS += util/svghelper.o LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o LIB_OBJS += util/data_map.o -LIB_OBJS += util/process_events.o BUILTIN_OBJS += builtin-annotate.o diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 3ebd70b..7d39bd2 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -19,12 +19,12 @@ #include "perf.h" #include "util/debug.h" +#include "util/event.h" #include "util/parse-options.h" #include "util/parse-events.h" #include "util/thread.h" #include "util/sort.h" #include "util/hist.h" -#include "util/process_events.h" static char const *input_name = "perf.data"; @@ -136,8 +136,7 @@ static int hist_entry__add(struct thread *thread, struct map *map, return 0; } -static int -process_sample_event(event_t *event, unsigned long offset, unsigned long head) +static int process_sample_event(event_t *event) { char level; u64 ip = event->ip.ip; @@ -145,12 +144,8 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) struct symbol *sym = NULL; struct thread *thread = threads__findnew(event->ip.pid); - dump_printf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.misc, - event->ip.pid, - (void *)(long)ip); + dump_printf("(IP, %d): %d: %p\n", event->header.misc, + event->ip.pid, (void *)(long)ip); if (thread == NULL) { fprintf(stderr, "problem processing %d event, skipping it.\n", @@ -198,46 +193,24 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) "skipping event\n"); return -1; } - total++; return 0; } -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) +static int event__process(event_t *self) { - struct thread *thread = threads__findnew(event->comm.pid); - - dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); - - if (thread == NULL || - thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); - return -1; - } - total_comm++; - - return 0; -} - -static int -process_event(event_t *event, unsigned long offset, unsigned long head) -{ - switch (event->header.type) { + switch (self->header.type) { case PERF_RECORD_SAMPLE: - return process_sample_event(event, offset, head); + return process_sample_event(self); case PERF_RECORD_MMAP: - return process_mmap_event(event, offset, head); + return event__process_mmap(self); case PERF_RECORD_COMM: - return process_comm_event(event, offset, head); + return event__process_comm(self); case PERF_RECORD_FORK: - return process_task_event(event, offset, head); + return event__process_task(self); /* * We dont process them right now but they are fine: */ @@ -621,15 +594,12 @@ more: (void *)(long)event->header.size, event->header.type); - if (!size || process_event(event, offset, head) < 0) { + if (!size || event__process(event) < 0) { dump_printf("%p [%p]: skipping unknown header type: %d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.type); - - total_unknown++; - /* * assume we lost track of the stream, check alignment, and * increment a single u64 in the hope to catch on again 'soon'. @@ -649,14 +619,11 @@ more: rc = EXIT_SUCCESS; close(input); - dump_printf(" IP events: %10ld\n", total); - dump_printf(" mmap events: %10ld\n", total_mmap); - dump_printf(" comm events: %10ld\n", total_comm); - dump_printf(" fork events: %10ld\n", total_fork); - dump_printf(" unknown events: %10ld\n", total_unknown); - if (dump_trace) + if (dump_trace) { + event__print_totals(); return 0; + } if (verbose > 3) threads__fprintf(stdout); @@ -665,7 +632,7 @@ more: dsos__fprintf(stdout); collapse__resort(); - output__resort(total); + output__resort(event__total[0]); find_annotations(); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 35722fa..e7294c8 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -33,9 +33,6 @@ static bool raw_ip; static char default_sort_order[] = "frag,hit,bytes"; -static char *cwd; -static int cwdlen; - static int *cpunode_map; static int max_cpu_num; @@ -126,25 +123,6 @@ static void setup_cpunode_map(void) } } -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->comm.pid); - - dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); - - if (thread == NULL || - thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); - return -1; - } - - return 0; -} - static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, int bytes_req, int bytes_alloc, int cpu) { @@ -340,8 +318,7 @@ process_raw_event(event_t *raw_event __used, void *more_data, } } -static int -process_sample_event(event_t *event, unsigned long offset, unsigned long head) +static int process_sample_event(event_t *event) { u64 ip = event->ip.ip; u64 timestamp = -1; @@ -366,9 +343,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), + dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", event->header.misc, event->ip.pid, event->ip.tid, (void *)(long)ip, @@ -403,7 +378,7 @@ static int sample_type_check(u64 type) static struct perf_file_handler file_handler = { .process_sample_event = process_sample_event, - .process_comm_event = process_comm_event, + .process_comm_event = event__process_comm, .sample_type_check = sample_type_check, }; @@ -413,7 +388,7 @@ static int read_events(void) register_perf_file_handler(&file_handler); return mmap_dispatch_perf_file(&header, input_name, 0, 0, - &cwdlen, &cwd); + &event__cwdlen, &event__cwd); } static double fragmentation(unsigned long n_req, unsigned long n_alloc) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9bd20c2..01ef35c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -30,7 +30,6 @@ #include "util/thread.h" #include "util/sort.h" #include "util/hist.h" -#include "util/process_events.h" static char const *input_name = "perf.data"; @@ -655,8 +654,7 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) return 0; } -static int -process_sample_event(event_t *event, unsigned long offset, unsigned long head) +static int process_sample_event(event_t *event) { char level; struct symbol *sym = NULL; @@ -673,9 +671,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), + dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", event->header.misc, event->ip.pid, event->ip.tid, (void *)(long)ip, @@ -743,47 +739,27 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } - total += period; + event__stats.total += period; return 0; } -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) +static int process_comm_event(event_t *event) { struct thread *thread = threads__findnew(event->comm.pid); - dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); + dump_printf(": %s:%d\n", event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm_adjust(thread, event->comm.comm)) { dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } - total_comm++; - - return 0; -} - -static int -process_lost_event(event_t *event, unsigned long offset, unsigned long head) -{ - dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->lost.id, - event->lost.lost); - - total_lost += event->lost.lost; return 0; } -static int -process_read_event(event_t *event, unsigned long offset, unsigned long head) +static int process_read_event(event_t *event) { struct perf_event_attr *attr; @@ -799,14 +775,9 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head) event->read.value); } - dump_printf("%p [%p]: PERF_RECORD_READ: %d %d %s %Lu\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->read.pid, - event->read.tid, - attr ? __event_name(attr->type, attr->config) - : "FAIL", - event->read.value); + dump_printf(": %d %d %s %Lu\n", event->read.pid, event->read.tid, + attr ? __event_name(attr->type, attr->config) : "FAIL", + event->read.value); return 0; } @@ -842,11 +813,11 @@ static int sample_type_check(u64 type) static struct perf_file_handler file_handler = { .process_sample_event = process_sample_event, - .process_mmap_event = process_mmap_event, + .process_mmap_event = event__process_mmap, .process_comm_event = process_comm_event, - .process_exit_event = process_task_event, - .process_fork_event = process_task_event, - .process_lost_event = process_lost_event, + .process_exit_event = event__process_task, + .process_fork_event = event__process_task, + .process_lost_event = event__process_lost, .process_read_event = process_read_event, .sample_type_check = sample_type_check, }; @@ -866,19 +837,14 @@ static int __cmd_report(void) register_perf_file_handler(&file_handler); ret = mmap_dispatch_perf_file(&header, input_name, force, - full_paths, &cwdlen, &cwd); + full_paths, &event__cwdlen, &event__cwd); if (ret) return ret; - dump_printf(" IP events: %10ld\n", total); - dump_printf(" mmap events: %10ld\n", total_mmap); - dump_printf(" comm events: %10ld\n", total_comm); - dump_printf(" fork events: %10ld\n", total_fork); - dump_printf(" lost events: %10ld\n", total_lost); - dump_printf(" unknown events: %10ld\n", file_handler.total_unknown); - - if (dump_trace) + if (dump_trace) { + event__print_totals(); return 0; + } if (verbose > 3) threads__fprintf(stdout); @@ -887,8 +853,8 @@ static int __cmd_report(void) dsos__fprintf(stdout); collapse__resort(); - output__resort(total); - output__fprintf(stdout, total); + output__resort(event__stats.total); + output__fprintf(stdout, event__stats.total); if (show_threads) perf_read_values_destroy(&show_threads_values); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 19eb708..26b782f 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -22,8 +22,6 @@ static char const *input_name = "perf.data"; -static unsigned long total_comm = 0; - static struct perf_header *header; static u64 sample_type; @@ -32,9 +30,6 @@ static char *sort_order = default_sort_order; static int profile_cpu = -1; -static char *cwd; -static int cwdlen; - #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 @@ -633,27 +628,6 @@ static void test_calibrations(void) printf("the sleep test took %Ld nsecs\n", T1-T0); } -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->comm.tid); - - dump_printf("%p [%p]: perf_event_comm: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); - - if (thread == NULL || - thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing perf_event_comm, skipping event.\n"); - return -1; - } - total_comm++; - - return 0; -} - - struct raw_event_sample { u32 size; char data[0]; @@ -1622,8 +1596,7 @@ process_raw_event(event_t *raw_event __used, void *more_data, process_sched_migrate_task_event(raw, event, cpu, timestamp, thread); } -static int -process_sample_event(event_t *event, unsigned long offset, unsigned long head) +static int process_sample_event(event_t *event) { struct thread *thread; u64 ip = event->ip.ip; @@ -1653,9 +1626,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), + dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", event->header.misc, event->ip.pid, event->ip.tid, (void *)(long)ip, @@ -1677,10 +1648,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static int -process_lost_event(event_t *event __used, - unsigned long offset __used, - unsigned long head __used) +static int process_lost_event(event_t *event __used) { nr_lost_chunks++; nr_lost_events += event->lost.lost; @@ -1704,7 +1672,7 @@ static int sample_type_check(u64 type) static struct perf_file_handler file_handler = { .process_sample_event = process_sample_event, - .process_comm_event = process_comm_event, + .process_comm_event = event__process_comm, .process_lost_event = process_lost_event, .sample_type_check = sample_type_check, }; @@ -1715,7 +1683,7 @@ static int read_events(void) register_perf_file_handler(&file_handler); return mmap_dispatch_perf_file(&header, input_name, 0, 0, - &cwdlen, &cwd); + &event__cwdlen, &event__cwd); } static void print_bad_events(void) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index bf6730c..7a3c0c7 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -991,25 +991,6 @@ static void event__process_sample(const event_t *self, int counter) } } -static void event__process_mmap(event_t *self) -{ - struct thread *thread = threads__findnew(self->mmap.pid); - - if (thread != NULL) { - struct map *map = map__new(&self->mmap, MAP__FUNCTION, NULL, 0); - if (map != NULL) - thread__insert_map(thread, map); - } -} - -static void event__process_comm(event_t *self) -{ - struct thread *thread = threads__findnew(self->comm.pid); - - if (thread != NULL) - thread__set_comm(thread, self->comm.comm); -} - static int event__process(event_t *event) { switch (event->header.type) { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 75972fd..a775025 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -16,38 +16,10 @@ static char const *input_name = "perf.data"; -static unsigned long total = 0; -static unsigned long total_comm = 0; - static struct perf_header *header; static u64 sample_type; -static char *cwd; -static int cwdlen; - - -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->comm.pid); - - dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); - - if (thread == NULL || - thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); - return -1; - } - total_comm++; - - return 0; -} - -static int -process_sample_event(event_t *event, unsigned long offset, unsigned long head) +static int process_sample_event(event_t *event) { u64 ip = event->ip.ip; u64 timestamp = -1; @@ -72,9 +44,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), + dump_printf("(IP, %d): %d/%d: %p period: %Ld\n", event->header.misc, event->ip.pid, event->ip.tid, (void *)(long)ip, @@ -101,7 +71,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) */ print_event(cpu, raw->data, raw->size, timestamp, thread->comm); } - total += period; + event__stats.total += period; return 0; } @@ -122,7 +92,7 @@ static int sample_type_check(u64 type) static struct perf_file_handler file_handler = { .process_sample_event = process_sample_event, - .process_comm_event = process_comm_event, + .process_comm_event = event__process_comm, .sample_type_check = sample_type_check, }; @@ -132,7 +102,7 @@ static int __cmd_trace(void) register_perf_file_handler(&file_handler); return mmap_dispatch_perf_file(&header, input_name, - 0, 0, &cwdlen, &cwd); + 0, 0, &event__cwdlen, &event__cwd); } static const char * const annotate_usage[] = { diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c index b238462..ca0bedf 100644 --- a/tools/perf/util/data_map.c +++ b/tools/perf/util/data_map.c @@ -8,11 +8,9 @@ static struct perf_file_handler *curr_handler; static unsigned long mmap_window = 32; static char __cwd[PATH_MAX]; -static int -process_event_stub(event_t *event __used, - unsigned long offset __used, - unsigned long head __used) +static int process_event_stub(event_t *event __used) { + dump_printf(": unhandled!\n"); return 0; } @@ -40,30 +38,62 @@ void register_perf_file_handler(struct perf_file_handler *handler) curr_handler = handler; } +static const char *event__name[] = { + [0] = "TOTAL", + [PERF_RECORD_MMAP] = "MMAP", + [PERF_RECORD_LOST] = "LOST", + [PERF_RECORD_COMM] = "COMM", + [PERF_RECORD_EXIT] = "EXIT", + [PERF_RECORD_THROTTLE] = "THROTTLE", + [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE", + [PERF_RECORD_FORK] = "FORK", + [PERF_RECORD_READ] = "READ", + [PERF_RECORD_SAMPLE] = "SAMPLE", +}; + +unsigned long event__total[PERF_RECORD_MAX]; + +void event__print_totals(void) +{ + int i; + for (i = 0; i < PERF_RECORD_MAX; ++i) + pr_info("%10s events: %10ld\n", + event__name[i], event__total[i]); +} + static int process_event(event_t *event, unsigned long offset, unsigned long head) { trace_event(event); + if (event->header.type < PERF_RECORD_MAX) { + dump_printf("%p [%p]: PERF_RECORD_%s", + (void *)(offset + head), + (void *)(long)(event->header.size), + event__name[event->header.type]); + ++event__total[0]; + ++event__total[event->header.type]; + } + switch (event->header.type) { case PERF_RECORD_SAMPLE: - return curr_handler->process_sample_event(event, offset, head); + return curr_handler->process_sample_event(event); case PERF_RECORD_MMAP: - return curr_handler->process_mmap_event(event, offset, head); + return curr_handler->process_mmap_event(event); case PERF_RECORD_COMM: - return curr_handler->process_comm_event(event, offset, head); + return curr_handler->process_comm_event(event); case PERF_RECORD_FORK: - return curr_handler->process_fork_event(event, offset, head); + return curr_handler->process_fork_event(event); case PERF_RECORD_EXIT: - return curr_handler->process_exit_event(event, offset, head); + return curr_handler->process_exit_event(event); case PERF_RECORD_LOST: - return curr_handler->process_lost_event(event, offset, head); + return curr_handler->process_lost_event(event); case PERF_RECORD_READ: - return curr_handler->process_read_event(event, offset, head); + return curr_handler->process_read_event(event); case PERF_RECORD_THROTTLE: - return curr_handler->process_throttle_event(event, offset, head); + return curr_handler->process_throttle_event(event); case PERF_RECORD_UNTHROTTLE: - return curr_handler->process_unthrottle_event(event, offset, head); + return curr_handler->process_unthrottle_event(event); default: curr_handler->total_unknown++; return -1; diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h index ae036ec..3180ff7 100644 --- a/tools/perf/util/data_map.h +++ b/tools/perf/util/data_map.h @@ -4,7 +4,7 @@ #include "event.h" #include "header.h" -typedef int (*event_type_handler_t)(event_t *, unsigned long, unsigned long); +typedef int (*event_type_handler_t)(event_t *); struct perf_file_handler { event_type_handler_t process_sample_event; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1dae7e3..70b4aa0 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -2,6 +2,7 @@ #include "event.h" #include "debug.h" #include "string.h" +#include "thread.h" static pid_t event__synthesize_comm(pid_t pid, int full, int (*process)(event_t *event)) @@ -175,3 +176,76 @@ void event__synthesize_threads(int (*process)(event_t *event)) closedir(proc); } + +char *event__cwd; +int event__cwdlen; + +struct events_stats event__stats; + +int event__process_comm(event_t *self) +{ + struct thread *thread = threads__findnew(self->comm.pid); + + dump_printf("PERF_RECORD_COMM: %s:%d\n", + self->comm.comm, self->comm.pid); + + if (thread == NULL || thread__set_comm(thread, self->comm.comm)) { + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); + return -1; + } + + return 0; +} + +int event__process_lost(event_t *self) +{ + dump_printf(": id:%Ld: lost:%Ld\n", self->lost.id, self->lost.lost); + event__stats.lost += self->lost.lost; + return 0; +} + +int event__process_mmap(event_t *self) +{ + struct thread *thread = threads__findnew(self->mmap.pid); + struct map *map = map__new(&self->mmap, MAP__FUNCTION, + event__cwd, event__cwdlen); + + dump_printf(" %d/%d: [%p(%p) @ %p]: %s\n", + self->mmap.pid, self->mmap.tid, + (void *)(long)self->mmap.start, + (void *)(long)self->mmap.len, + (void *)(long)self->mmap.pgoff, + self->mmap.filename); + + if (thread == NULL || map == NULL) + dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); + else + thread__insert_map(thread, map); + + return 0; +} + +int event__process_task(event_t *self) +{ + struct thread *thread = threads__findnew(self->fork.pid); + struct thread *parent = threads__findnew(self->fork.ppid); + + dump_printf("(%d:%d):(%d:%d)\n", self->fork.pid, self->fork.tid, + self->fork.ppid, self->fork.ptid); + /* + * A thread clone will have the same PID for both parent and child. + */ + if (thread == parent) + return 0; + + if (self->header.type == PERF_RECORD_EXIT) + return 0; + + if (thread == NULL || parent == NULL || + thread__fork(thread, parent) < 0) { + dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); + return -1; + } + + return 0; +} diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 3ae3c96..13c12c7 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -80,6 +80,13 @@ typedef union event_union { struct sample_event sample; } event_t; +struct events_stats { + unsigned long total; + unsigned long lost; +}; + +void event__print_totals(void); + enum map_type { MAP__FUNCTION = 0, @@ -135,4 +142,14 @@ void map__fixup_end(struct map *self); int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)); void event__synthesize_threads(int (*process)(event_t *event)); +extern char *event__cwd; +extern int event__cwdlen; +extern struct events_stats event__stats; +extern unsigned long event__total[PERF_RECORD_MAX]; + +int event__process_comm(event_t *self); +int event__process_lost(event_t *self); +int event__process_mmap(event_t *self); +int event__process_task(event_t *self); + #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 7393a02..f26cd9b 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -10,13 +10,6 @@ struct callchain_param callchain_param = { .min_percent = 0.5 }; -unsigned long total; -unsigned long total_mmap; -unsigned long total_comm; -unsigned long total_fork; -unsigned long total_unknown; -unsigned long total_lost; - /* * histogram, sorted on item, collects counts */ diff --git a/tools/perf/util/process_event.c b/tools/perf/util/process_event.c deleted file mode 100644 index a970789..0000000 --- a/tools/perf/util/process_event.c +++ /dev/null @@ -1,53 +0,0 @@ -#include "process_event.h" - -char *cwd; -int cwdlen; - -int -process_mmap_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct map *map = map__new(&event->mmap, cwd, cwdlen); - struct thread *thread = threads__findnew(event->mmap.pid); - - dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->mmap.pid, - event->mmap.tid, - (void *)(long)event->mmap.start, - (void *)(long)event->mmap.len, - (void *)(long)event->mmap.pgoff, - event->mmap.filename); - - if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); - return 0; - } - - thread__insert_map(thread, map); - total_mmap++; - - return 0; - -} - -int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->comm.pid); - - dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); - - if (thread == NULL || - thread__set_comm_adjust(thread, event->comm.comm)) { - dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); - return -1; - } - total_comm++; - - return 0; -} - diff --git a/tools/perf/util/process_event.h b/tools/perf/util/process_event.h deleted file mode 100644 index 6f68c69..0000000 --- a/tools/perf/util/process_event.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef __PROCESS_EVENT_H -#define __PROCESS_EVENT_H - -#include "../builtin.h" -#include "util.h" - -#include "color.h" -#include -#include "cache.h" -#include -#include "symbol.h" -#include "string.h" - -#include "../perf.h" -#include "debug.h" - -#include "parse-options.h" -#include "parse-events.h" - -#include "thread.h" -#include "sort.h" -#include "hist.h" - -extern char *cwd; -extern int cwdlen; -extern int process_mmap_event(event_t *, unsigned long, unsigned long); -extern int process_comm_event(event_t *, unsigned long , unsigned long); - -#endif /* __PROCESS_H */ diff --git a/tools/perf/util/process_events.c b/tools/perf/util/process_events.c deleted file mode 100644 index 5377868..0000000 --- a/tools/perf/util/process_events.c +++ /dev/null @@ -1,64 +0,0 @@ -#include "process_events.h" - -char *cwd; -int cwdlen; - -int -process_mmap_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct map *map = map__new(&event->mmap, MAP__FUNCTION, cwd, cwdlen); - struct thread *thread = threads__findnew(event->mmap.pid); - - dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->mmap.pid, - event->mmap.tid, - (void *)(long)event->mmap.start, - (void *)(long)event->mmap.len, - (void *)(long)event->mmap.pgoff, - event->mmap.filename); - - if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); - return 0; - } - - thread__insert_map(thread, map); - total_mmap++; - - return 0; -} - -int -process_task_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->fork.pid); - struct thread *parent = threads__findnew(event->fork.ppid); - - dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT", - event->fork.pid, event->fork.tid, - event->fork.ppid, event->fork.ptid); - - /* - * A thread clone will have the same PID for both - * parent and child. - */ - if (thread == parent) - return 0; - - if (event->header.type == PERF_RECORD_EXIT) - return 0; - - if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); - return -1; - } - total_fork++; - - return 0; -} - diff --git a/tools/perf/util/process_events.h b/tools/perf/util/process_events.h deleted file mode 100644 index 73d092f..0000000 --- a/tools/perf/util/process_events.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef __PROCESS_EVENTS_H -#define __PROCESS_EVENTS_H - -#include "../builtin.h" - -#include "util.h" -#include "color.h" -#include -#include "cache.h" -#include -#include "symbol.h" -#include "string.h" -#include "callchain.h" -#include "strlist.h" -#include "values.h" - -#include "../perf.h" -#include "debug.h" -#include "header.h" - -#include "parse-options.h" -#include "parse-events.h" - -#include "data_map.h" -#include "thread.h" -#include "sort.h" -#include "hist.h" - -extern char *cwd; -extern int cwdlen; - -extern int process_mmap_event(event_t *, unsigned long , unsigned long); -extern int process_task_event(event_t *, unsigned long, unsigned long); - -#endif /* __PROCESS_EVENTS_H */ -- cgit v0.10.2 From 1ed091c45ae33b2179d387573c3fe3f3b4adf60a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 27 Nov 2009 16:29:23 -0200 Subject: perf tools: Consolidate symbol resolving across all tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now we have a very high level routine for simple tools to process IP sample events: int event__preprocess_sample(const event_t *self, struct addr_location *al, symbol_filter_t filter) It receives the event itself and will insert new threads in the global threads list and resolve the map and symbol, filling all this info into the new addr_location struct, so that tools like annotate and report can further process the event by creating hist_entries in their specific way (with or without callgraphs, etc). It in turn uses the new next layer function: void thread__find_addr_location(struct thread *self, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al, symbol_filter_t filter) This one will, given a thread (userspace or the kernel kthread one), will find the given type (MAP__FUNCTION now, MAP__VARIABLE too in the near future) at the given cpumode, taking vdsos into account (userspace hit, but kernel symbol) and will fill all these details in the addr_location given. Tools that need a more compact API for plain function resolution, like 'kmem', can use this other one: struct symbol *thread__find_function(struct thread *self, u64 addr, symbol_filter_t filter) So, to resolve a kernel symbol, that is all the 'kmem' tool needs, its just a matter of calling: sym = thread__find_function(kthread, addr, NULL); The 'filter' parameter is needed because we do lazy parsing/loading of ELF symtabs or /proc/kallsyms. With this we remove more code duplication all around, which is always good, huh? :-) Signed-off-by: Arnaldo Carvalho de Melo Cc: Frédéric Weisbecker Cc: John Kacur Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <1259346563-12568-12-git-send-email-acme@infradead.org> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7d39bd2..7f85c6e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -124,71 +124,30 @@ static void hist_hit(struct hist_entry *he, u64 ip) h->ip[offset]); } -static int hist_entry__add(struct thread *thread, struct map *map, - struct symbol *sym, u64 ip, u64 count, char level) +static int hist_entry__add(struct addr_location *al, u64 count) { bool hit; - struct hist_entry *he = __hist_entry__add(thread, map, sym, NULL, ip, - count, level, &hit); + struct hist_entry *he = __hist_entry__add(al, NULL, count, &hit); if (he == NULL) return -ENOMEM; - hist_hit(he, ip); + hist_hit(he, al->addr); return 0; } static int process_sample_event(event_t *event) { - char level; - u64 ip = event->ip.ip; - struct map *map = NULL; - struct symbol *sym = NULL; - struct thread *thread = threads__findnew(event->ip.pid); + struct addr_location al; dump_printf("(IP, %d): %d: %p\n", event->header.misc, - event->ip.pid, (void *)(long)ip); + event->ip.pid, (void *)(long)event->ip.ip); - if (thread == NULL) { + if (event__preprocess_sample(event, &al, symbol_filter) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; } - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - - if (event->header.misc & PERF_RECORD_MISC_KERNEL) { - level = 'k'; - sym = kernel_maps__find_function(ip, &map, symbol_filter); - dump_printf(" ...... dso: %s\n", - map ? map->dso->long_name : ""); - } else if (event->header.misc & PERF_RECORD_MISC_USER) { - level = '.'; - map = thread__find_map(thread, MAP__FUNCTION, ip); - if (map != NULL) { - ip = map->map_ip(map, ip); - sym = map__find_symbol(map, ip, symbol_filter); - } else { - /* - * If this is outside of all known maps, - * and is a negative address, try to look it - * up in the kernel dso, as it might be a - * vsyscall or vdso (which executes in user-mode). - * - * XXX This is nasty, we should have a symbol list in - * the "[vdso]" dso, but for now lets use the old - * trick of looking in the whole kernel symbol list. - */ - if ((long long)ip < 0) - sym = kernel_maps__find_function(ip, &map, - symbol_filter); - } - dump_printf(" ...... dso: %s\n", - map ? map->dso->long_name : ""); - } else { - level = 'H'; - dump_printf(" ...... dso: [hypervisor]\n"); - } - - if (hist_entry__add(thread, map, sym, ip, 1, level)) { + if (hist_entry__add(&al, 1)) { fprintf(stderr, "problem incrementing symbol count, " "skipping event\n"); return -1; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index e7294c8..047fef7 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -420,7 +420,7 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) if (is_caller) { addr = data->call_site; if (!raw_ip) - sym = kernel_maps__find_function(addr, NULL, NULL); + sym = thread__find_function(kthread, addr, NULL); } else addr = data->ptr; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 01ef35c..383c4ab 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -408,55 +408,6 @@ static int thread__set_comm_adjust(struct thread *self, const char *comm) return 0; } - -static struct symbol * -resolve_symbol(struct thread *thread, struct map **mapp, u64 *ipp) -{ - struct map *map = mapp ? *mapp : NULL; - u64 ip = *ipp; - - if (map) - goto got_map; - - if (!thread) - return NULL; - - map = thread__find_map(thread, MAP__FUNCTION, ip); - if (map != NULL) { - /* - * We have to do this here as we may have a dso - * with no symbol hit that has a name longer than - * the ones with symbols sampled. - */ - if (!sort_dso.elide && !map->dso->slen_calculated) - dso__calc_col_width(map->dso); - - if (mapp) - *mapp = map; -got_map: - ip = map->map_ip(map, ip); - } else { - /* - * If this is outside of all known maps, - * and is a negative address, try to look it - * up in the kernel dso, as it might be a - * vsyscall or vdso (which executes in user-mode). - * - * XXX This is nasty, we should have a symbol list in - * the "[vdso]" dso, but for now lets use the old - * trick of looking in the whole kernel symbol list. - */ - if ((long long)ip < 0) - return kernel_maps__find_function(ip, mapp, NULL); - } - dump_printf(" ...... dso: %s\n", - map ? map->dso->long_name : ""); - dump_printf(" ...... map: %Lx -> %Lx\n", *ipp, ip); - *ipp = ip; - - return map ? map__find_symbol(map, ip, NULL) : NULL; -} - static int call__match(struct symbol *sym) { if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) @@ -469,7 +420,7 @@ static struct symbol **resolve_callchain(struct thread *thread, struct ip_callchain *chain, struct symbol **parent) { - u64 context = PERF_CONTEXT_MAX; + u8 cpumode = PERF_RECORD_MISC_USER; struct symbol **syms = NULL; unsigned int i; @@ -483,30 +434,31 @@ static struct symbol **resolve_callchain(struct thread *thread, for (i = 0; i < chain->nr; i++) { u64 ip = chain->ips[i]; - struct symbol *sym = NULL; + struct addr_location al; if (ip >= PERF_CONTEXT_MAX) { - context = ip; + switch (ip) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; break; + default: + break; + } continue; } - switch (context) { - case PERF_CONTEXT_HV: - break; - case PERF_CONTEXT_KERNEL: - sym = kernel_maps__find_function(ip, NULL, NULL); - break; - default: - sym = resolve_symbol(thread, NULL, &ip); - break; - } - - if (sym) { - if (sort__has_parent && !*parent && call__match(sym)) - *parent = sym; + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, + ip, &al, NULL); + if (al.sym != NULL) { + if (sort__has_parent && !*parent && + call__match(al.sym)) + *parent = al.sym; if (!callchain) break; - syms[i] = sym; + syms[i] = al.sym; } } @@ -517,20 +469,17 @@ static struct symbol **resolve_callchain(struct thread *thread, * collect histogram counts */ -static int -hist_entry__add(struct thread *thread, struct map *map, - struct symbol *sym, u64 ip, struct ip_callchain *chain, - char level, u64 count) +static int hist_entry__add(struct addr_location *al, + struct ip_callchain *chain, u64 count) { struct symbol **syms = NULL, *parent = NULL; bool hit; struct hist_entry *he; if ((sort__has_parent || callchain) && chain) - syms = resolve_callchain(thread, chain, &parent); + syms = resolve_callchain(al->thread, chain, &parent); - he = __hist_entry__add(thread, map, sym, parent, - ip, count, level, &hit); + he = __hist_entry__add(al, parent, count, &hit); if (he == NULL) return -ENOMEM; @@ -656,14 +605,12 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) static int process_sample_event(event_t *event) { - char level; - struct symbol *sym = NULL; u64 ip = event->ip.ip; u64 period = 1; - struct map *map = NULL; void *more_data = event->ip.__more_data; struct ip_callchain *chain = NULL; int cpumode; + struct addr_location al; struct thread *thread = threads__findnew(event->ip.pid); if (sample_type & PERF_SAMPLE_PERIOD) { @@ -709,32 +656,26 @@ static int process_sample_event(event_t *event) cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (cpumode == PERF_RECORD_MISC_KERNEL) { - level = 'k'; - sym = kernel_maps__find_function(ip, &map, NULL); - dump_printf(" ...... dso: %s\n", - map ? map->dso->long_name : ""); - } else if (cpumode == PERF_RECORD_MISC_USER) { - level = '.'; - sym = resolve_symbol(thread, &map, &ip); - - } else { - level = 'H'; - dump_printf(" ...... dso: [hypervisor]\n"); - } + thread__find_addr_location(thread, cpumode, + MAP__FUNCTION, ip, &al, NULL); + /* + * We have to do this here as we may have a dso with no symbol hit that + * has a name longer than the ones with symbols sampled. + */ + if (al.map && !sort_dso.elide && !al.map->dso->slen_calculated) + dso__calc_col_width(al.map->dso); if (dso_list && - (!map || !map->dso || - !(strlist__has_entry(dso_list, map->dso->short_name) || - (map->dso->short_name != map->dso->long_name && - strlist__has_entry(dso_list, map->dso->long_name))))) + (!al.map || !al.map->dso || + !(strlist__has_entry(dso_list, al.map->dso->short_name) || + (al.map->dso->short_name != al.map->dso->long_name && + strlist__has_entry(dso_list, al.map->dso->long_name))))) return 0; - if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) + if (sym_list && al.sym && !strlist__has_entry(sym_list, al.sym->name)) return 0; - if (hist_entry__add(thread, map, sym, ip, - chain, level, period)) { + if (hist_entry__add(&al, chain, period)) { pr_debug("problem incrementing symbol count, skipping event\n"); return -1; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7a3c0c7..e0a374d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -929,55 +929,28 @@ static int symbol_filter(struct map *map, struct symbol *sym) static void event__process_sample(const event_t *self, int counter) { u64 ip = self->ip.ip; - struct map *map; struct sym_entry *syme; - struct symbol *sym; + struct addr_location al; u8 origin = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; switch (origin) { - case PERF_RECORD_MISC_USER: { - struct thread *thread; - + case PERF_RECORD_MISC_USER: if (hide_user_symbols) return; - - thread = threads__findnew(self->ip.pid); - if (thread == NULL) - return; - - map = thread__find_map(thread, MAP__FUNCTION, ip); - if (map != NULL) { - ip = map->map_ip(map, ip); - sym = map__find_symbol(map, ip, symbol_filter); - if (sym == NULL) - return; - userspace_samples++; - break; - } - } - /* - * If this is outside of all known maps, - * and is a negative address, try to look it - * up in the kernel dso, as it might be a - * vsyscall or vdso (which executes in user-mode). - */ - if ((long long)ip >= 0) - return; - /* Fall thru */ + break; case PERF_RECORD_MISC_KERNEL: if (hide_kernel_symbols) return; - - sym = kernel_maps__find_function(ip, &map, symbol_filter); - if (sym == NULL) - return; break; default: return; } - syme = symbol__priv(sym); + if (event__preprocess_sample(self, &al, symbol_filter) < 0 || + al.sym == NULL) + return; + syme = symbol__priv(al.sym); if (!syme->skip) { syme->count[counter]++; syme->origin = origin; @@ -986,8 +959,9 @@ static void event__process_sample(const event_t *self, int counter) if (list_empty(&syme->node) || !syme->node.next) __list_insert_active_sym(syme); pthread_mutex_unlock(&active_symbols_lock); + if (origin == PERF_RECORD_MISC_USER) + ++userspace_samples; ++samples; - return; } } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 70b4aa0..233d7ad 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -249,3 +249,65 @@ int event__process_task(event_t *self) return 0; } + +void thread__find_addr_location(struct thread *self, u8 cpumode, + enum map_type type, u64 addr, + struct addr_location *al, + symbol_filter_t filter) +{ + struct thread *thread = al->thread = self; + + al->addr = addr; + + if (cpumode & PERF_RECORD_MISC_KERNEL) { + al->level = 'k'; + thread = kthread; + } else if (cpumode & PERF_RECORD_MISC_USER) + al->level = '.'; + else { + al->level = 'H'; + al->map = NULL; + al->sym = NULL; + return; + } +try_again: + al->map = thread__find_map(thread, type, al->addr); + if (al->map == NULL) { + /* + * If this is outside of all known maps, and is a negative + * address, try to look it up in the kernel dso, as it might be + * a vsyscall or vdso (which executes in user-mode). + * + * XXX This is nasty, we should have a symbol list in the + * "[vdso]" dso, but for now lets use the old trick of looking + * in the whole kernel symbol list. + */ + if ((long long)al->addr < 0 && thread != kthread) { + thread = kthread; + goto try_again; + } + al->sym = NULL; + } else { + al->addr = al->map->map_ip(al->map, al->addr); + al->sym = map__find_symbol(al->map, al->addr, filter); + } +} + +int event__preprocess_sample(const event_t *self, struct addr_location *al, + symbol_filter_t filter) +{ + u8 cpumode = self->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + struct thread *thread = threads__findnew(self->ip.pid); + + if (thread == NULL) + return -1; + + dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); + + thread__find_addr_location(thread, cpumode, MAP__FUNCTION, + self->ip.ip, al, filter); + dump_printf(" ...... dso: %s\n", + al->map ? al->map->dso->long_name : + al->level == 'H' ? "[hypervisor]" : ""); + return 0; +} diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 13c12c7..a4cc810 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -152,4 +152,8 @@ int event__process_lost(event_t *self); int event__process_mmap(event_t *self); int event__process_task(event_t *self); +struct addr_location; +int event__preprocess_sample(const event_t *self, struct addr_location *al, + symbol_filter_t filter); + #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f26cd9b..0ebf6ee 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -14,20 +14,19 @@ struct callchain_param callchain_param = { * histogram, sorted on item, collects counts */ -struct hist_entry *__hist_entry__add(struct thread *thread, struct map *map, - struct symbol *sym, +struct hist_entry *__hist_entry__add(struct addr_location *al, struct symbol *sym_parent, - u64 ip, u64 count, char level, bool *hit) + u64 count, bool *hit) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; struct hist_entry *he; struct hist_entry entry = { - .thread = thread, - .map = map, - .sym = sym, - .ip = ip, - .level = level, + .thread = al->thread, + .map = al->map, + .sym = al->sym, + .ip = al->addr, + .level = al->level, .count = count, .parent = sym_parent, }; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index ac2149c..3020db0 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -36,9 +36,9 @@ extern unsigned long total_fork; extern unsigned long total_unknown; extern unsigned long total_lost; -struct hist_entry *__hist_entry__add(struct thread *thread, struct map *map, - struct symbol *sym, struct symbol *parent, - u64 ip, u64 count, char level, bool *hit); +struct hist_entry *__hist_entry__add(struct addr_location *al, + struct symbol *parent, + u64 count, bool *hit); extern int64_t hist_entry__cmp(struct hist_entry *, struct hist_entry *); extern int64_t hist_entry__collapse(struct hist_entry *, struct hist_entry *); extern void hist_entry__free(struct hist_entry *); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b788c2f..fffcb93 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -43,7 +43,8 @@ static struct symbol_conf symbol_conf__defaults = { .try_vmlinux_path = true, }; -static struct thread kthread_mem, *kthread = &kthread_mem; +static struct thread kthread_mem; +struct thread *kthread = &kthread_mem; bool dso__loaded(const struct dso *self, enum map_type type) { @@ -1178,29 +1179,6 @@ out: return ret; } -static struct symbol *thread__find_symbol(struct thread *self, u64 ip, - enum map_type type, struct map **mapp, - symbol_filter_t filter) -{ - struct map *map = thread__find_map(self, type, ip); - - if (mapp) - *mapp = map; - - if (map) { - ip = map->map_ip(map, ip); - return map__find_symbol(map, ip, filter); - } - - return NULL; -} - -struct symbol *kernel_maps__find_function(u64 ip, struct map **mapp, - symbol_filter_t filter) -{ - return thread__find_symbol(kthread, ip, MAP__FUNCTION, mapp, filter); -} - static struct map *thread__find_map_by_name(struct thread *self, char *name) { struct rb_node *nd; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 3f9e4a4..17003ef 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -63,6 +63,14 @@ static inline void *symbol__priv(struct symbol *self) return ((void *)self) - symbol__priv_size; } +struct addr_location { + struct thread *thread; + struct map *map; + struct symbol *sym; + u64 addr; + char level; +}; + struct dso { struct list_head node; struct rb_root symbols[MAP__NR_TYPES]; @@ -105,6 +113,8 @@ size_t kernel_maps__fprintf(FILE *fp); int symbol__init(struct symbol_conf *conf); +struct thread; +struct thread *kthread; extern struct list_head dsos__user, dsos__kernel; extern struct dso *vdso; #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 2229f82..603f561 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -285,3 +285,15 @@ size_t threads__fprintf(FILE *fp) return ret; } + +struct symbol *thread__find_symbol(struct thread *self, + enum map_type type, u64 addr, + symbol_filter_t filter) +{ + struct map *map = thread__find_map(self, type, addr); + + if (map != NULL) + return map__find_symbol(map, map->map_ip(map, addr), filter); + + return NULL; +} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 59b0d9b..686d6e9 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -27,19 +27,30 @@ size_t thread__fprintf_maps(struct thread *self, FILE *fp); size_t threads__fprintf(FILE *fp); void maps__insert(struct rb_root *maps, struct map *map); -struct map *maps__find(struct rb_root *maps, u64 ip); - -struct symbol *kernel_maps__find_function(const u64 ip, struct map **mapp, - symbol_filter_t filter); +struct map *maps__find(struct rb_root *maps, u64 addr); static inline struct map *thread__find_map(struct thread *self, - enum map_type type, u64 ip) + enum map_type type, u64 addr) { - return self ? maps__find(&self->maps[type], ip) : NULL; + return self ? maps__find(&self->maps[type], addr) : NULL; } static inline void __thread__insert_map(struct thread *self, struct map *map) { maps__insert(&self->maps[map->type], map); } + +void thread__find_addr_location(struct thread *self, u8 cpumode, + enum map_type type, u64 addr, + struct addr_location *al, + symbol_filter_t filter); +struct symbol *thread__find_symbol(struct thread *self, + enum map_type type, u64 addr, + symbol_filter_t filter); + +static inline struct symbol * +thread__find_function(struct thread *self, u64 addr, symbol_filter_t filter) +{ + return thread__find_symbol(self, MAP__FUNCTION, addr, filter); +} #endif /* __PERF_THREAD_H */ -- cgit v0.10.2 From f151ccf76b38d2ffdfe9e44fa01fe2fc0a754637 Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Tue, 28 Oct 2008 21:41:39 +0300 Subject: collie: fix scoop convesion to new api diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index bbf2ebc..b629cba 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -56,6 +56,7 @@ static struct resource collie_scoop_resources[] = { static struct scoop_config collie_scoop_setup = { .io_dir = COLLIE_SCOOP_IO_DIR, .io_out = COLLIE_SCOOP_IO_OUT, + .gpio_base = COLLIE_SCOOP_GPIO_BASE, }; struct platform_device colliescoop_device = { -- cgit v0.10.2 From 1d0ad843b08f7655b8ac011bca1e3e0c69a554de Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Mon, 5 Oct 2009 22:05:38 +0200 Subject: collie: prepare for gpiolib use prefix gpio definitions for direct register access with '_' so we can use the other names for gpio_request & co diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index b629cba..9f5029c 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -249,22 +249,24 @@ static void __init collie_init(void) GPDR = GPIO_LDD8 | GPIO_LDD9 | GPIO_LDD10 | GPIO_LDD11 | GPIO_LDD12 | GPIO_LDD13 | GPIO_LDD14 | GPIO_LDD15 | GPIO_SSP_TXD | GPIO_SSP_SCLK | GPIO_SSP_SFRM | GPIO_SDLC_SCLK | - COLLIE_GPIO_UCB1x00_RESET | COLLIE_GPIO_nMIC_ON | - COLLIE_GPIO_nREMOCON_ON | GPIO_32_768kHz; + _COLLIE_GPIO_UCB1x00_RESET | _COLLIE_GPIO_nMIC_ON | + _COLLIE_GPIO_nREMOCON_ON | GPIO_32_768kHz; PPDR = PPC_LDD0 | PPC_LDD1 | PPC_LDD2 | PPC_LDD3 | PPC_LDD4 | PPC_LDD5 | PPC_LDD6 | PPC_LDD7 | PPC_L_PCLK | PPC_L_LCLK | PPC_L_FCLK | PPC_L_BIAS | PPC_TXD1 | PPC_TXD2 | PPC_TXD3 | PPC_TXD4 | PPC_SCLK | PPC_SFRM; - PWER = COLLIE_GPIO_AC_IN | COLLIE_GPIO_CO | COLLIE_GPIO_ON_KEY | - COLLIE_GPIO_WAKEUP | COLLIE_GPIO_nREMOCON_INT | PWER_RTC; + PWER = _COLLIE_GPIO_AC_IN | _COLLIE_GPIO_CO | _COLLIE_GPIO_ON_KEY | + _COLLIE_GPIO_WAKEUP | _COLLIE_GPIO_nREMOCON_INT | PWER_RTC; - PGSR = COLLIE_GPIO_nREMOCON_ON; + PGSR = _COLLIE_GPIO_nREMOCON_ON; PSDR = PPC_RXD1 | PPC_RXD2 | PPC_RXD3 | PPC_RXD4; PCFR = PCFR_OPDE; + GPSR |= _COLLIE_GPIO_UCB1x00_RESET; + platform_scoop_config = &collie_pcmcia_config; diff --git a/arch/arm/mach-sa1100/include/mach/collie.h b/arch/arm/mach-sa1100/include/mach/collie.h index 9efb569..8c8fe46 100644 --- a/arch/arm/mach-sa1100/include/mach/collie.h +++ b/arch/arm/mach-sa1100/include/mach/collie.h @@ -30,24 +30,34 @@ COLLIE_SCP_LB_VOL_CHG ) #define COLLIE_SCOOP_IO_OUT ( COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R ) -/* GPIOs for which the generic definition doesn't say much */ +/* GPIOs for gpiolib */ -#define COLLIE_GPIO_ON_KEY GPIO_GPIO (0) -#define COLLIE_GPIO_AC_IN GPIO_GPIO (1) -#define COLLIE_GPIO_SDIO_INT GPIO_GPIO (11) -#define COLLIE_GPIO_CF_IRQ GPIO_GPIO (14) -#define COLLIE_GPIO_nREMOCON_INT GPIO_GPIO (15) -#define COLLIE_GPIO_UCB1x00_RESET GPIO_GPIO (16) -#define COLLIE_GPIO_nMIC_ON GPIO_GPIO (17) -#define COLLIE_GPIO_nREMOCON_ON GPIO_GPIO (18) -#define COLLIE_GPIO_CO GPIO_GPIO (20) -#define COLLIE_GPIO_MCP_CLK GPIO_GPIO (21) -#define COLLIE_GPIO_CF_CD GPIO_GPIO (22) -#define COLLIE_GPIO_UCB1x00_IRQ GPIO_GPIO (23) -#define COLLIE_GPIO_WAKEUP GPIO_GPIO (24) -#define COLLIE_GPIO_GA_INT GPIO_GPIO (25) -#define COLLIE_GPIO_MAIN_BAT_LOW GPIO_GPIO (26) +#define COLLIE_GPIO_ON_KEY (0) +#define COLLIE_GPIO_AC_IN (1) +#define COLLIE_GPIO_SDIO_INT (11) +#define COLLIE_GPIO_CF_IRQ (14) +#define COLLIE_GPIO_nREMOCON_INT (15) +#define COLLIE_GPIO_UCB1x00_RESET (16) +#define COLLIE_GPIO_nMIC_ON (17) +#define COLLIE_GPIO_nREMOCON_ON (18) +#define COLLIE_GPIO_CO (20) +#define COLLIE_GPIO_MCP_CLK (21) +#define COLLIE_GPIO_CF_CD (22) +#define COLLIE_GPIO_UCB1x00_IRQ (23) +#define COLLIE_GPIO_WAKEUP (24) +#define COLLIE_GPIO_GA_INT (25) +#define COLLIE_GPIO_MAIN_BAT_LOW (26) +/* GPIO definitions for direct register access */ + +#define _COLLIE_GPIO_ON_KEY GPIO_GPIO(0) +#define _COLLIE_GPIO_AC_IN GPIO_GPIO(1) +#define _COLLIE_GPIO_nREMOCON_INT GPIO_GPIO(15) +#define _COLLIE_GPIO_UCB1x00_RESET GPIO_GPIO(16) +#define _COLLIE_GPIO_nMIC_ON GPIO_GPIO(17) +#define _COLLIE_GPIO_nREMOCON_ON GPIO_GPIO(18) +#define _COLLIE_GPIO_CO GPIO_GPIO(20) +#define _COLLIE_GPIO_WAKEUP GPIO_GPIO(24) /* Interrupts */ #define COLLIE_IRQ_GPIO_ON_KEY IRQ_GPIO0 -- cgit v0.10.2 From c8602edf3f9471466755329b78d309f2a01dd449 Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Tue, 10 Feb 2009 14:54:57 +0100 Subject: move drivers/mfd/*.h to include/linux/mfd So drivers like collie_battery driver can use those files easier. diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c index 57271cb..84815f9 100644 --- a/drivers/mfd/mcp-core.c +++ b/drivers/mfd/mcp-core.c @@ -17,11 +17,11 @@ #include #include #include +#include #include #include -#include "mcp.h" #define to_mcp(d) container_of(d, struct mcp, attached_device) #define to_mcp_driver(d) container_of(d, struct mcp_driver, drv) diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c index 62b32da..21218981 100644 --- a/drivers/mfd/mcp-sa11x0.c +++ b/drivers/mfd/mcp-sa11x0.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -28,7 +29,6 @@ #include -#include "mcp.h" struct mcp_sa11x0 { u32 mccr0; diff --git a/drivers/mfd/mcp.h b/drivers/mfd/mcp.h deleted file mode 100644 index c093a93..0000000 --- a/drivers/mfd/mcp.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * linux/drivers/mfd/mcp.h - * - * Copyright (C) 2001 Russell King, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. - */ -#ifndef MCP_H -#define MCP_H - -struct mcp_ops; - -struct mcp { - struct module *owner; - struct mcp_ops *ops; - spinlock_t lock; - int use_count; - unsigned int sclk_rate; - unsigned int rw_timeout; - dma_device_t dma_audio_rd; - dma_device_t dma_audio_wr; - dma_device_t dma_telco_rd; - dma_device_t dma_telco_wr; - struct device attached_device; -}; - -struct mcp_ops { - void (*set_telecom_divisor)(struct mcp *, unsigned int); - void (*set_audio_divisor)(struct mcp *, unsigned int); - void (*reg_write)(struct mcp *, unsigned int, unsigned int); - unsigned int (*reg_read)(struct mcp *, unsigned int); - void (*enable)(struct mcp *); - void (*disable)(struct mcp *); -}; - -void mcp_set_telecom_divisor(struct mcp *, unsigned int); -void mcp_set_audio_divisor(struct mcp *, unsigned int); -void mcp_reg_write(struct mcp *, unsigned int, unsigned int); -unsigned int mcp_reg_read(struct mcp *, unsigned int); -void mcp_enable(struct mcp *); -void mcp_disable(struct mcp *); -#define mcp_get_sclk_rate(mcp) ((mcp)->sclk_rate) - -struct mcp *mcp_host_alloc(struct device *, size_t); -int mcp_host_register(struct mcp *); -void mcp_host_unregister(struct mcp *); - -struct mcp_driver { - struct device_driver drv; - int (*probe)(struct mcp *); - void (*remove)(struct mcp *); - int (*suspend)(struct mcp *, pm_message_t); - int (*resume)(struct mcp *); -}; - -int mcp_driver_register(struct mcp_driver *); -void mcp_driver_unregister(struct mcp_driver *); - -#define mcp_get_drvdata(mcp) dev_get_drvdata(&(mcp)->attached_device) -#define mcp_set_drvdata(mcp,d) dev_set_drvdata(&(mcp)->attached_device, d) - -#define mcp_priv(mcp) ((void *)((mcp)+1)) - -#endif diff --git a/drivers/mfd/ucb1x00-assabet.c b/drivers/mfd/ucb1x00-assabet.c index 86fed48..cea9da6 100644 --- a/drivers/mfd/ucb1x00-assabet.c +++ b/drivers/mfd/ucb1x00-assabet.c @@ -14,10 +14,10 @@ #include #include #include +#include #include -#include "ucb1x00.h" #define UCB1X00_ATTR(name,input)\ static ssize_t name##_show(struct device *dev, struct device_attribute *attr, \ diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index 60c3988..f9de789 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -25,11 +25,11 @@ #include #include #include +#include #include #include -#include "ucb1x00.h" static DEFINE_MUTEX(ucb1x00_mutex); static LIST_HEAD(ucb1x00_drivers); diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c index 61b7d3e..000cb41 100644 --- a/drivers/mfd/ucb1x00-ts.c +++ b/drivers/mfd/ucb1x00-ts.c @@ -30,12 +30,12 @@ #include #include #include +#include #include #include #include -#include "ucb1x00.h" struct ucb1x00_ts { diff --git a/drivers/mfd/ucb1x00.h b/drivers/mfd/ucb1x00.h deleted file mode 100644 index a8ad8a0..0000000 --- a/drivers/mfd/ucb1x00.h +++ /dev/null @@ -1,255 +0,0 @@ -/* - * linux/drivers/mfd/ucb1x00.h - * - * Copyright (C) 2001 Russell King, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. - */ -#ifndef UCB1200_H -#define UCB1200_H - -#define UCB_IO_DATA 0x00 -#define UCB_IO_DIR 0x01 - -#define UCB_IO_0 (1 << 0) -#define UCB_IO_1 (1 << 1) -#define UCB_IO_2 (1 << 2) -#define UCB_IO_3 (1 << 3) -#define UCB_IO_4 (1 << 4) -#define UCB_IO_5 (1 << 5) -#define UCB_IO_6 (1 << 6) -#define UCB_IO_7 (1 << 7) -#define UCB_IO_8 (1 << 8) -#define UCB_IO_9 (1 << 9) - -#define UCB_IE_RIS 0x02 -#define UCB_IE_FAL 0x03 -#define UCB_IE_STATUS 0x04 -#define UCB_IE_CLEAR 0x04 -#define UCB_IE_ADC (1 << 11) -#define UCB_IE_TSPX (1 << 12) -#define UCB_IE_TSMX (1 << 13) -#define UCB_IE_TCLIP (1 << 14) -#define UCB_IE_ACLIP (1 << 15) - -#define UCB_IRQ_TSPX 12 - -#define UCB_TC_A 0x05 -#define UCB_TC_A_LOOP (1 << 7) /* UCB1200 */ -#define UCB_TC_A_AMPL (1 << 7) /* UCB1300 */ - -#define UCB_TC_B 0x06 -#define UCB_TC_B_VOICE_ENA (1 << 3) -#define UCB_TC_B_CLIP (1 << 4) -#define UCB_TC_B_ATT (1 << 6) -#define UCB_TC_B_SIDE_ENA (1 << 11) -#define UCB_TC_B_MUTE (1 << 13) -#define UCB_TC_B_IN_ENA (1 << 14) -#define UCB_TC_B_OUT_ENA (1 << 15) - -#define UCB_AC_A 0x07 -#define UCB_AC_B 0x08 -#define UCB_AC_B_LOOP (1 << 8) -#define UCB_AC_B_MUTE (1 << 13) -#define UCB_AC_B_IN_ENA (1 << 14) -#define UCB_AC_B_OUT_ENA (1 << 15) - -#define UCB_TS_CR 0x09 -#define UCB_TS_CR_TSMX_POW (1 << 0) -#define UCB_TS_CR_TSPX_POW (1 << 1) -#define UCB_TS_CR_TSMY_POW (1 << 2) -#define UCB_TS_CR_TSPY_POW (1 << 3) -#define UCB_TS_CR_TSMX_GND (1 << 4) -#define UCB_TS_CR_TSPX_GND (1 << 5) -#define UCB_TS_CR_TSMY_GND (1 << 6) -#define UCB_TS_CR_TSPY_GND (1 << 7) -#define UCB_TS_CR_MODE_INT (0 << 8) -#define UCB_TS_CR_MODE_PRES (1 << 8) -#define UCB_TS_CR_MODE_POS (2 << 8) -#define UCB_TS_CR_BIAS_ENA (1 << 11) -#define UCB_TS_CR_TSPX_LOW (1 << 12) -#define UCB_TS_CR_TSMX_LOW (1 << 13) - -#define UCB_ADC_CR 0x0a -#define UCB_ADC_SYNC_ENA (1 << 0) -#define UCB_ADC_VREFBYP_CON (1 << 1) -#define UCB_ADC_INP_TSPX (0 << 2) -#define UCB_ADC_INP_TSMX (1 << 2) -#define UCB_ADC_INP_TSPY (2 << 2) -#define UCB_ADC_INP_TSMY (3 << 2) -#define UCB_ADC_INP_AD0 (4 << 2) -#define UCB_ADC_INP_AD1 (5 << 2) -#define UCB_ADC_INP_AD2 (6 << 2) -#define UCB_ADC_INP_AD3 (7 << 2) -#define UCB_ADC_EXT_REF (1 << 5) -#define UCB_ADC_START (1 << 7) -#define UCB_ADC_ENA (1 << 15) - -#define UCB_ADC_DATA 0x0b -#define UCB_ADC_DAT_VAL (1 << 15) -#define UCB_ADC_DAT(x) (((x) & 0x7fe0) >> 5) - -#define UCB_ID 0x0c -#define UCB_ID_1200 0x1004 -#define UCB_ID_1300 0x1005 -#define UCB_ID_TC35143 0x9712 - -#define UCB_MODE 0x0d -#define UCB_MODE_DYN_VFLAG_ENA (1 << 12) -#define UCB_MODE_AUD_OFF_CAN (1 << 13) - -#include "mcp.h" - -struct ucb1x00_irq { - void *devid; - void (*fn)(int, void *); -}; - -struct ucb1x00 { - spinlock_t lock; - struct mcp *mcp; - unsigned int irq; - struct semaphore adc_sem; - spinlock_t io_lock; - u16 id; - u16 io_dir; - u16 io_out; - u16 adc_cr; - u16 irq_fal_enbl; - u16 irq_ris_enbl; - struct ucb1x00_irq irq_handler[16]; - struct device dev; - struct list_head node; - struct list_head devs; -}; - -struct ucb1x00_driver; - -struct ucb1x00_dev { - struct list_head dev_node; - struct list_head drv_node; - struct ucb1x00 *ucb; - struct ucb1x00_driver *drv; - void *priv; -}; - -struct ucb1x00_driver { - struct list_head node; - struct list_head devs; - int (*add)(struct ucb1x00_dev *dev); - void (*remove)(struct ucb1x00_dev *dev); - int (*suspend)(struct ucb1x00_dev *dev, pm_message_t state); - int (*resume)(struct ucb1x00_dev *dev); -}; - -#define classdev_to_ucb1x00(cd) container_of(cd, struct ucb1x00, dev) - -int ucb1x00_register_driver(struct ucb1x00_driver *); -void ucb1x00_unregister_driver(struct ucb1x00_driver *); - -/** - * ucb1x00_clkrate - return the UCB1x00 SIB clock rate - * @ucb: UCB1x00 structure describing chip - * - * Return the SIB clock rate in Hz. - */ -static inline unsigned int ucb1x00_clkrate(struct ucb1x00 *ucb) -{ - return mcp_get_sclk_rate(ucb->mcp); -} - -/** - * ucb1x00_enable - enable the UCB1x00 SIB clock - * @ucb: UCB1x00 structure describing chip - * - * Enable the SIB clock. This can be called multiple times. - */ -static inline void ucb1x00_enable(struct ucb1x00 *ucb) -{ - mcp_enable(ucb->mcp); -} - -/** - * ucb1x00_disable - disable the UCB1x00 SIB clock - * @ucb: UCB1x00 structure describing chip - * - * Disable the SIB clock. The SIB clock will only be disabled - * when the number of ucb1x00_enable calls match the number of - * ucb1x00_disable calls. - */ -static inline void ucb1x00_disable(struct ucb1x00 *ucb) -{ - mcp_disable(ucb->mcp); -} - -/** - * ucb1x00_reg_write - write a UCB1x00 register - * @ucb: UCB1x00 structure describing chip - * @reg: UCB1x00 4-bit register index to write - * @val: UCB1x00 16-bit value to write - * - * Write the UCB1x00 register @reg with value @val. The SIB - * clock must be running for this function to return. - */ -static inline void ucb1x00_reg_write(struct ucb1x00 *ucb, unsigned int reg, unsigned int val) -{ - mcp_reg_write(ucb->mcp, reg, val); -} - -/** - * ucb1x00_reg_read - read a UCB1x00 register - * @ucb: UCB1x00 structure describing chip - * @reg: UCB1x00 4-bit register index to write - * - * Read the UCB1x00 register @reg and return its value. The SIB - * clock must be running for this function to return. - */ -static inline unsigned int ucb1x00_reg_read(struct ucb1x00 *ucb, unsigned int reg) -{ - return mcp_reg_read(ucb->mcp, reg); -} -/** - * ucb1x00_set_audio_divisor - - * @ucb: UCB1x00 structure describing chip - * @div: SIB clock divisor - */ -static inline void ucb1x00_set_audio_divisor(struct ucb1x00 *ucb, unsigned int div) -{ - mcp_set_audio_divisor(ucb->mcp, div); -} - -/** - * ucb1x00_set_telecom_divisor - - * @ucb: UCB1x00 structure describing chip - * @div: SIB clock divisor - */ -static inline void ucb1x00_set_telecom_divisor(struct ucb1x00 *ucb, unsigned int div) -{ - mcp_set_telecom_divisor(ucb->mcp, div); -} - -void ucb1x00_io_set_dir(struct ucb1x00 *ucb, unsigned int, unsigned int); -void ucb1x00_io_write(struct ucb1x00 *ucb, unsigned int, unsigned int); -unsigned int ucb1x00_io_read(struct ucb1x00 *ucb); - -#define UCB_NOSYNC (0) -#define UCB_SYNC (1) - -unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync); -void ucb1x00_adc_enable(struct ucb1x00 *ucb); -void ucb1x00_adc_disable(struct ucb1x00 *ucb); - -/* - * Which edges of the IRQ do you want to control today? - */ -#define UCB_RISING (1 << 0) -#define UCB_FALLING (1 << 1) - -int ucb1x00_hook_irq(struct ucb1x00 *ucb, unsigned int idx, void (*fn)(int, void *), void *devid); -void ucb1x00_enable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges); -void ucb1x00_disable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges); -int ucb1x00_free_irq(struct ucb1x00 *ucb, unsigned int idx, void *devid); - -#endif diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h new file mode 100644 index 0000000..be95e09 --- /dev/null +++ b/include/linux/mfd/mcp.h @@ -0,0 +1,68 @@ +/* + * linux/drivers/mfd/mcp.h + * + * Copyright (C) 2001 Russell King, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + */ +#ifndef MCP_H +#define MCP_H + +#include + +struct mcp_ops; + +struct mcp { + struct module *owner; + struct mcp_ops *ops; + spinlock_t lock; + int use_count; + unsigned int sclk_rate; + unsigned int rw_timeout; + dma_device_t dma_audio_rd; + dma_device_t dma_audio_wr; + dma_device_t dma_telco_rd; + dma_device_t dma_telco_wr; + struct device attached_device; +}; + +struct mcp_ops { + void (*set_telecom_divisor)(struct mcp *, unsigned int); + void (*set_audio_divisor)(struct mcp *, unsigned int); + void (*reg_write)(struct mcp *, unsigned int, unsigned int); + unsigned int (*reg_read)(struct mcp *, unsigned int); + void (*enable)(struct mcp *); + void (*disable)(struct mcp *); +}; + +void mcp_set_telecom_divisor(struct mcp *, unsigned int); +void mcp_set_audio_divisor(struct mcp *, unsigned int); +void mcp_reg_write(struct mcp *, unsigned int, unsigned int); +unsigned int mcp_reg_read(struct mcp *, unsigned int); +void mcp_enable(struct mcp *); +void mcp_disable(struct mcp *); +#define mcp_get_sclk_rate(mcp) ((mcp)->sclk_rate) + +struct mcp *mcp_host_alloc(struct device *, size_t); +int mcp_host_register(struct mcp *); +void mcp_host_unregister(struct mcp *); + +struct mcp_driver { + struct device_driver drv; + int (*probe)(struct mcp *); + void (*remove)(struct mcp *); + int (*suspend)(struct mcp *, pm_message_t); + int (*resume)(struct mcp *); +}; + +int mcp_driver_register(struct mcp_driver *); +void mcp_driver_unregister(struct mcp_driver *); + +#define mcp_get_drvdata(mcp) dev_get_drvdata(&(mcp)->attached_device) +#define mcp_set_drvdata(mcp,d) dev_set_drvdata(&(mcp)->attached_device, d) + +#define mcp_priv(mcp) ((void *)((mcp)+1)) + +#endif diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h new file mode 100644 index 0000000..eac3463 --- /dev/null +++ b/include/linux/mfd/ucb1x00.h @@ -0,0 +1,255 @@ +/* + * linux/include/mfd/ucb1x00.h + * + * Copyright (C) 2001 Russell King, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + */ +#ifndef UCB1200_H +#define UCB1200_H + +#include +#define UCB_IO_DATA 0x00 +#define UCB_IO_DIR 0x01 + +#define UCB_IO_0 (1 << 0) +#define UCB_IO_1 (1 << 1) +#define UCB_IO_2 (1 << 2) +#define UCB_IO_3 (1 << 3) +#define UCB_IO_4 (1 << 4) +#define UCB_IO_5 (1 << 5) +#define UCB_IO_6 (1 << 6) +#define UCB_IO_7 (1 << 7) +#define UCB_IO_8 (1 << 8) +#define UCB_IO_9 (1 << 9) + +#define UCB_IE_RIS 0x02 +#define UCB_IE_FAL 0x03 +#define UCB_IE_STATUS 0x04 +#define UCB_IE_CLEAR 0x04 +#define UCB_IE_ADC (1 << 11) +#define UCB_IE_TSPX (1 << 12) +#define UCB_IE_TSMX (1 << 13) +#define UCB_IE_TCLIP (1 << 14) +#define UCB_IE_ACLIP (1 << 15) + +#define UCB_IRQ_TSPX 12 + +#define UCB_TC_A 0x05 +#define UCB_TC_A_LOOP (1 << 7) /* UCB1200 */ +#define UCB_TC_A_AMPL (1 << 7) /* UCB1300 */ + +#define UCB_TC_B 0x06 +#define UCB_TC_B_VOICE_ENA (1 << 3) +#define UCB_TC_B_CLIP (1 << 4) +#define UCB_TC_B_ATT (1 << 6) +#define UCB_TC_B_SIDE_ENA (1 << 11) +#define UCB_TC_B_MUTE (1 << 13) +#define UCB_TC_B_IN_ENA (1 << 14) +#define UCB_TC_B_OUT_ENA (1 << 15) + +#define UCB_AC_A 0x07 +#define UCB_AC_B 0x08 +#define UCB_AC_B_LOOP (1 << 8) +#define UCB_AC_B_MUTE (1 << 13) +#define UCB_AC_B_IN_ENA (1 << 14) +#define UCB_AC_B_OUT_ENA (1 << 15) + +#define UCB_TS_CR 0x09 +#define UCB_TS_CR_TSMX_POW (1 << 0) +#define UCB_TS_CR_TSPX_POW (1 << 1) +#define UCB_TS_CR_TSMY_POW (1 << 2) +#define UCB_TS_CR_TSPY_POW (1 << 3) +#define UCB_TS_CR_TSMX_GND (1 << 4) +#define UCB_TS_CR_TSPX_GND (1 << 5) +#define UCB_TS_CR_TSMY_GND (1 << 6) +#define UCB_TS_CR_TSPY_GND (1 << 7) +#define UCB_TS_CR_MODE_INT (0 << 8) +#define UCB_TS_CR_MODE_PRES (1 << 8) +#define UCB_TS_CR_MODE_POS (2 << 8) +#define UCB_TS_CR_BIAS_ENA (1 << 11) +#define UCB_TS_CR_TSPX_LOW (1 << 12) +#define UCB_TS_CR_TSMX_LOW (1 << 13) + +#define UCB_ADC_CR 0x0a +#define UCB_ADC_SYNC_ENA (1 << 0) +#define UCB_ADC_VREFBYP_CON (1 << 1) +#define UCB_ADC_INP_TSPX (0 << 2) +#define UCB_ADC_INP_TSMX (1 << 2) +#define UCB_ADC_INP_TSPY (2 << 2) +#define UCB_ADC_INP_TSMY (3 << 2) +#define UCB_ADC_INP_AD0 (4 << 2) +#define UCB_ADC_INP_AD1 (5 << 2) +#define UCB_ADC_INP_AD2 (6 << 2) +#define UCB_ADC_INP_AD3 (7 << 2) +#define UCB_ADC_EXT_REF (1 << 5) +#define UCB_ADC_START (1 << 7) +#define UCB_ADC_ENA (1 << 15) + +#define UCB_ADC_DATA 0x0b +#define UCB_ADC_DAT_VAL (1 << 15) +#define UCB_ADC_DAT(x) (((x) & 0x7fe0) >> 5) + +#define UCB_ID 0x0c +#define UCB_ID_1200 0x1004 +#define UCB_ID_1300 0x1005 +#define UCB_ID_TC35143 0x9712 + +#define UCB_MODE 0x0d +#define UCB_MODE_DYN_VFLAG_ENA (1 << 12) +#define UCB_MODE_AUD_OFF_CAN (1 << 13) + + +struct ucb1x00_irq { + void *devid; + void (*fn)(int, void *); +}; + +struct ucb1x00 { + spinlock_t lock; + struct mcp *mcp; + unsigned int irq; + struct semaphore adc_sem; + spinlock_t io_lock; + u16 id; + u16 io_dir; + u16 io_out; + u16 adc_cr; + u16 irq_fal_enbl; + u16 irq_ris_enbl; + struct ucb1x00_irq irq_handler[16]; + struct device dev; + struct list_head node; + struct list_head devs; +}; + +struct ucb1x00_driver; + +struct ucb1x00_dev { + struct list_head dev_node; + struct list_head drv_node; + struct ucb1x00 *ucb; + struct ucb1x00_driver *drv; + void *priv; +}; + +struct ucb1x00_driver { + struct list_head node; + struct list_head devs; + int (*add)(struct ucb1x00_dev *dev); + void (*remove)(struct ucb1x00_dev *dev); + int (*suspend)(struct ucb1x00_dev *dev, pm_message_t state); + int (*resume)(struct ucb1x00_dev *dev); +}; + +#define classdev_to_ucb1x00(cd) container_of(cd, struct ucb1x00, dev) + +int ucb1x00_register_driver(struct ucb1x00_driver *); +void ucb1x00_unregister_driver(struct ucb1x00_driver *); + +/** + * ucb1x00_clkrate - return the UCB1x00 SIB clock rate + * @ucb: UCB1x00 structure describing chip + * + * Return the SIB clock rate in Hz. + */ +static inline unsigned int ucb1x00_clkrate(struct ucb1x00 *ucb) +{ + return mcp_get_sclk_rate(ucb->mcp); +} + +/** + * ucb1x00_enable - enable the UCB1x00 SIB clock + * @ucb: UCB1x00 structure describing chip + * + * Enable the SIB clock. This can be called multiple times. + */ +static inline void ucb1x00_enable(struct ucb1x00 *ucb) +{ + mcp_enable(ucb->mcp); +} + +/** + * ucb1x00_disable - disable the UCB1x00 SIB clock + * @ucb: UCB1x00 structure describing chip + * + * Disable the SIB clock. The SIB clock will only be disabled + * when the number of ucb1x00_enable calls match the number of + * ucb1x00_disable calls. + */ +static inline void ucb1x00_disable(struct ucb1x00 *ucb) +{ + mcp_disable(ucb->mcp); +} + +/** + * ucb1x00_reg_write - write a UCB1x00 register + * @ucb: UCB1x00 structure describing chip + * @reg: UCB1x00 4-bit register index to write + * @val: UCB1x00 16-bit value to write + * + * Write the UCB1x00 register @reg with value @val. The SIB + * clock must be running for this function to return. + */ +static inline void ucb1x00_reg_write(struct ucb1x00 *ucb, unsigned int reg, unsigned int val) +{ + mcp_reg_write(ucb->mcp, reg, val); +} + +/** + * ucb1x00_reg_read - read a UCB1x00 register + * @ucb: UCB1x00 structure describing chip + * @reg: UCB1x00 4-bit register index to write + * + * Read the UCB1x00 register @reg and return its value. The SIB + * clock must be running for this function to return. + */ +static inline unsigned int ucb1x00_reg_read(struct ucb1x00 *ucb, unsigned int reg) +{ + return mcp_reg_read(ucb->mcp, reg); +} +/** + * ucb1x00_set_audio_divisor - + * @ucb: UCB1x00 structure describing chip + * @div: SIB clock divisor + */ +static inline void ucb1x00_set_audio_divisor(struct ucb1x00 *ucb, unsigned int div) +{ + mcp_set_audio_divisor(ucb->mcp, div); +} + +/** + * ucb1x00_set_telecom_divisor - + * @ucb: UCB1x00 structure describing chip + * @div: SIB clock divisor + */ +static inline void ucb1x00_set_telecom_divisor(struct ucb1x00 *ucb, unsigned int div) +{ + mcp_set_telecom_divisor(ucb->mcp, div); +} + +void ucb1x00_io_set_dir(struct ucb1x00 *ucb, unsigned int, unsigned int); +void ucb1x00_io_write(struct ucb1x00 *ucb, unsigned int, unsigned int); +unsigned int ucb1x00_io_read(struct ucb1x00 *ucb); + +#define UCB_NOSYNC (0) +#define UCB_SYNC (1) + +unsigned int ucb1x00_adc_read(struct ucb1x00 *ucb, int adc_channel, int sync); +void ucb1x00_adc_enable(struct ucb1x00 *ucb); +void ucb1x00_adc_disable(struct ucb1x00 *ucb); + +/* + * Which edges of the IRQ do you want to control today? + */ +#define UCB_RISING (1 << 0) +#define UCB_FALLING (1 << 1) + +int ucb1x00_hook_irq(struct ucb1x00 *ucb, unsigned int idx, void (*fn)(int, void *), void *devid); +void ucb1x00_enable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges); +void ucb1x00_disable_irq(struct ucb1x00 *ucb, unsigned int idx, int edges); +int ucb1x00_free_irq(struct ucb1x00 *ucb, unsigned int idx, void *devid); + +#endif -- cgit v0.10.2 From 167c55ef80d26679b8b4b4ffba9da208a7c1875d Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Tue, 10 Feb 2009 13:21:42 +0100 Subject: collie: locomo-led change default trigger Collie uses now the powersupply framework. Change the default led-trigger of locomo-led to reflect that. diff --git a/drivers/leds/leds-locomo.c b/drivers/leds/leds-locomo.c index 5d91362..1f7c10f 100644 --- a/drivers/leds/leds-locomo.c +++ b/drivers/leds/leds-locomo.c @@ -44,7 +44,7 @@ static void locomoled_brightness_set1(struct led_classdev *led_cdev, static struct led_classdev locomo_led0 = { .name = "locomo:amber:charge", - .default_trigger = "sharpsl-charge", + .default_trigger = "main-battery-charging", .brightness_set = locomoled_brightness_set0, }; -- cgit v0.10.2 From cc647172795713e013f8de4bcdf91860e9e87bff Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Mon, 9 Feb 2009 23:14:44 +0100 Subject: SA1100: make gpio_to_irq and reverse a macro The function can't be used for static initialisations so convert them to macros. diff --git a/arch/arm/mach-sa1100/include/mach/gpio.h b/arch/arm/mach-sa1100/include/mach/gpio.h index 582a0c9..7befc10 100644 --- a/arch/arm/mach-sa1100/include/mach/gpio.h +++ b/arch/arm/mach-sa1100/include/mach/gpio.h @@ -49,20 +49,9 @@ static inline void gpio_set_value(unsigned gpio, int value) #define gpio_cansleep __gpio_cansleep -static inline unsigned gpio_to_irq(unsigned gpio) -{ - if (gpio < 11) - return IRQ_GPIO0 + gpio; - else - return IRQ_GPIO11 - 11 + gpio; -} - -static inline unsigned irq_to_gpio(unsigned irq) -{ - if (irq < IRQ_GPIO11_27) - return irq - IRQ_GPIO0; - else - return irq - IRQ_GPIO11 + 11; -} +#define gpio_to_irq(gpio) ((gpio < 11) ? (IRQ_GPIO0 + gpio) : \ + (IRQ_GPIO11 - 11 + gpio)) +#define irq_to_gpio(irq) ((irq < IRQ_GPIO11_27) ? (irq - IRQ_GPIO0) : \ + (irq - IRQ_GPIO11 + 11)) #endif -- cgit v0.10.2 From 9ca3dc805cd0d89c44f88b9a399061946781323a Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Tue, 10 Feb 2009 14:50:56 +0100 Subject: add gpiolib support to ucb1x00 The old access methods to the gpios will be removed when all users has been converted. (mainly ucb1x00-ts) diff --git a/arch/arm/mach-sa1100/include/mach/mcp.h b/arch/arm/mach-sa1100/include/mach/mcp.h index fb8b09a..ed1a331 100644 --- a/arch/arm/mach-sa1100/include/mach/mcp.h +++ b/arch/arm/mach-sa1100/include/mach/mcp.h @@ -16,6 +16,7 @@ struct mcp_plat_data { u32 mccr0; u32 mccr1; unsigned int sclk_rate; + int gpio_base; }; #endif diff --git a/drivers/mfd/mcp-sa11x0.c b/drivers/mfd/mcp-sa11x0.c index 21218981..2584272 100644 --- a/drivers/mfd/mcp-sa11x0.c +++ b/drivers/mfd/mcp-sa11x0.c @@ -163,6 +163,7 @@ static int mcp_sa11x0_probe(struct platform_device *pdev) mcp->dma_audio_wr = DMA_Ser4MCP0Wr; mcp->dma_telco_rd = DMA_Ser4MCP1Rd; mcp->dma_telco_wr = DMA_Ser4MCP1Wr; + mcp->gpio_base = data->gpio_base; platform_set_drvdata(pdev, mcp); diff --git a/drivers/mfd/ucb1x00-core.c b/drivers/mfd/ucb1x00-core.c index f9de789..252b741 100644 --- a/drivers/mfd/ucb1x00-core.c +++ b/drivers/mfd/ucb1x00-core.c @@ -26,11 +26,11 @@ #include #include #include +#include #include #include - static DEFINE_MUTEX(ucb1x00_mutex); static LIST_HEAD(ucb1x00_drivers); static LIST_HEAD(ucb1x00_devices); @@ -108,6 +108,60 @@ unsigned int ucb1x00_io_read(struct ucb1x00 *ucb) return ucb1x00_reg_read(ucb, UCB_IO_DATA); } +static void ucb1x00_gpio_set(struct gpio_chip *chip, unsigned offset, int value) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + unsigned long flags; + + spin_lock_irqsave(&ucb->io_lock, flags); + if (value) + ucb->io_out |= 1 << offset; + else + ucb->io_out &= ~(1 << offset); + + ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out); + spin_unlock_irqrestore(&ucb->io_lock, flags); +} + +static int ucb1x00_gpio_get(struct gpio_chip *chip, unsigned offset) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + return ucb1x00_reg_read(ucb, UCB_IO_DATA) & (1 << offset); +} + +static int ucb1x00_gpio_direction_input(struct gpio_chip *chip, unsigned offset) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + unsigned long flags; + + spin_lock_irqsave(&ucb->io_lock, flags); + ucb->io_dir &= ~(1 << offset); + ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir); + spin_unlock_irqrestore(&ucb->io_lock, flags); + + return 0; +} + +static int ucb1x00_gpio_direction_output(struct gpio_chip *chip, unsigned offset + , int value) +{ + struct ucb1x00 *ucb = container_of(chip, struct ucb1x00, gpio); + unsigned long flags; + + spin_lock_irqsave(&ucb->io_lock, flags); + ucb->io_dir |= (1 << offset); + ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir); + + if (value) + ucb->io_out |= 1 << offset; + else + ucb->io_out &= ~(1 << offset); + ucb1x00_reg_write(ucb, UCB_IO_DATA, ucb->io_out); + spin_unlock_irqrestore(&ucb->io_lock, flags); + + return 0; +} + /* * UCB1300 data sheet says we must: * 1. enable ADC => 5us (including reference startup time) @@ -476,6 +530,7 @@ static int ucb1x00_probe(struct mcp *mcp) struct ucb1x00_driver *drv; unsigned int id; int ret = -ENODEV; + int temp; mcp_enable(mcp); id = mcp_reg_read(mcp, UCB_ID); @@ -508,12 +563,27 @@ static int ucb1x00_probe(struct mcp *mcp) goto err_free; } + ucb->gpio.base = -1; + if (mcp->gpio_base != 0) { + ucb->gpio.label = dev_name(&ucb->dev); + ucb->gpio.base = mcp->gpio_base; + ucb->gpio.ngpio = 10; + ucb->gpio.set = ucb1x00_gpio_set; + ucb->gpio.get = ucb1x00_gpio_get; + ucb->gpio.direction_input = ucb1x00_gpio_direction_input; + ucb->gpio.direction_output = ucb1x00_gpio_direction_output; + ret = gpiochip_add(&ucb->gpio); + if (ret) + goto err_free; + } else + dev_info(&ucb->dev, "gpio_base not set so no gpiolib support"); + ret = request_irq(ucb->irq, ucb1x00_irq, IRQF_TRIGGER_RISING, "UCB1x00", ucb); if (ret) { printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n", ucb->irq, ret); - goto err_free; + goto err_gpio; } mcp_set_drvdata(mcp, ucb); @@ -522,6 +592,7 @@ static int ucb1x00_probe(struct mcp *mcp) if (ret) goto err_irq; + INIT_LIST_HEAD(&ucb->devs); mutex_lock(&ucb1x00_mutex); list_add(&ucb->node, &ucb1x00_devices); @@ -529,10 +600,14 @@ static int ucb1x00_probe(struct mcp *mcp) ucb1x00_add_dev(ucb, drv); } mutex_unlock(&ucb1x00_mutex); + goto out; err_irq: free_irq(ucb->irq, ucb); + err_gpio: + if (ucb->gpio.base != -1) + temp = gpiochip_remove(&ucb->gpio); err_free: kfree(ucb); err_disable: @@ -545,6 +620,7 @@ static void ucb1x00_remove(struct mcp *mcp) { struct ucb1x00 *ucb = mcp_get_drvdata(mcp); struct list_head *l, *n; + int ret; mutex_lock(&ucb1x00_mutex); list_del(&ucb->node); @@ -554,6 +630,12 @@ static void ucb1x00_remove(struct mcp *mcp) } mutex_unlock(&ucb1x00_mutex); + if (ucb->gpio.base != -1) { + ret = gpiochip_remove(&ucb->gpio); + if (ret) + dev_err(&ucb->dev, "Can't remove gpio chip: %d\n", ret); + } + free_irq(ucb->irq, ucb); device_unregister(&ucb->dev); } @@ -604,6 +686,7 @@ static int ucb1x00_resume(struct mcp *mcp) struct ucb1x00 *ucb = mcp_get_drvdata(mcp); struct ucb1x00_dev *dev; + ucb1x00_reg_write(ucb, UCB_IO_DIR, ucb->io_dir); mutex_lock(&ucb1x00_mutex); list_for_each_entry(dev, &ucb->devs, dev_node) { if (dev->drv->resume) diff --git a/include/linux/mfd/mcp.h b/include/linux/mfd/mcp.h index be95e09..ee496708 100644 --- a/include/linux/mfd/mcp.h +++ b/include/linux/mfd/mcp.h @@ -26,6 +26,7 @@ struct mcp { dma_device_t dma_telco_rd; dma_device_t dma_telco_wr; struct device attached_device; + int gpio_base; }; struct mcp_ops { diff --git a/include/linux/mfd/ucb1x00.h b/include/linux/mfd/ucb1x00.h index eac3463..aa9c378 100644 --- a/include/linux/mfd/ucb1x00.h +++ b/include/linux/mfd/ucb1x00.h @@ -11,6 +11,8 @@ #define UCB1200_H #include +#include + #define UCB_IO_DATA 0x00 #define UCB_IO_DIR 0x01 @@ -123,6 +125,7 @@ struct ucb1x00 { struct device dev; struct list_head node; struct list_head devs; + struct gpio_chip gpio; }; struct ucb1x00_driver; -- cgit v0.10.2 From f7177c8452618df34ecdcd7b6f2cb941aec0ffc3 Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Tue, 10 Feb 2009 14:12:02 +0100 Subject: collie: convert to gpiolib for ucb1x00 Only the parts used for collie_battery are converted. The rest will be cleaned up later. diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index 9f5029c..395cf09 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -86,6 +86,7 @@ static struct scoop_pcmcia_config collie_pcmcia_config = { static struct mcp_plat_data collie_mcp_data = { .mccr0 = MCCR0_ADM | MCCR0_ExtClk, .sclk_rate = 9216000, + .gpio_base = COLLIE_TC35143_GPIO_BASE, }; #ifdef CONFIG_SHARP_LOCOMO diff --git a/arch/arm/mach-sa1100/include/mach/collie.h b/arch/arm/mach-sa1100/include/mach/collie.h index 8c8fe46..71a0b3f 100644 --- a/arch/arm/mach-sa1100/include/mach/collie.h +++ b/arch/arm/mach-sa1100/include/mach/collie.h @@ -25,10 +25,10 @@ #define COLLIE_GPIO_VPEN (COLLIE_SCOOP_GPIO_BASE + 7) #define COLLIE_SCP_LB_VOL_CHG SCOOP_GPCR_PA19 -#define COLLIE_SCOOP_IO_DIR ( COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R | \ +#define COLLIE_SCOOP_IO_DIR (COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R | \ COLLIE_SCP_5VON | COLLIE_SCP_AMP_ON | \ - COLLIE_SCP_LB_VOL_CHG ) -#define COLLIE_SCOOP_IO_OUT ( COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R ) + COLLIE_SCP_LB_VOL_CHG) +#define COLLIE_SCOOP_IO_OUT (COLLIE_SCP_MUTE_L | COLLIE_SCP_MUTE_R) /* GPIOs for gpiolib */ @@ -80,19 +80,20 @@ #define COLLIE_LCM_IRQ_GPIO_nSD_WP IRQ_LOCOMO_GPIO14 /* GPIO's on the TC35143AF (Toshiba Analog Frontend) */ -#define COLLIE_TC35143_GPIO_VERSION0 UCB_IO_0 /* GPIO0=Version */ -#define COLLIE_TC35143_GPIO_TBL_CHK UCB_IO_1 /* GPIO1=TBL_CHK */ -#define COLLIE_TC35143_GPIO_VPEN_ON UCB_IO_2 /* GPIO2=VPNE_ON */ -#define COLLIE_TC35143_GPIO_IR_ON UCB_IO_3 /* GPIO3=IR_ON */ -#define COLLIE_TC35143_GPIO_AMP_ON UCB_IO_4 /* GPIO4=AMP_ON */ -#define COLLIE_TC35143_GPIO_VERSION1 UCB_IO_5 /* GPIO5=Version */ -#define COLLIE_TC35143_GPIO_FS8KLPF UCB_IO_5 /* GPIO5=fs 8k LPF */ -#define COLLIE_TC35143_GPIO_BUZZER_BIAS UCB_IO_6 /* GPIO6=BUZZER BIAS */ -#define COLLIE_TC35143_GPIO_MBAT_ON UCB_IO_7 /* GPIO7=MBAT_ON */ -#define COLLIE_TC35143_GPIO_BBAT_ON UCB_IO_8 /* GPIO8=BBAT_ON */ -#define COLLIE_TC35143_GPIO_TMP_ON UCB_IO_9 /* GPIO9=TMP_ON */ -#define COLLIE_TC35143_GPIO_IN ( UCB_IO_0 | UCB_IO_2 | UCB_IO_5 ) -#define COLLIE_TC35143_GPIO_OUT ( UCB_IO_1 | UCB_IO_3 | UCB_IO_4 | UCB_IO_6 | \ - UCB_IO_7 | UCB_IO_8 | UCB_IO_9 ) +#define COLLIE_TC35143_GPIO_BASE (GPIO_MAX + 13) +#define COLLIE_TC35143_GPIO_VERSION0 UCB_IO_0 +#define COLLIE_TC35143_GPIO_TBL_CHK UCB_IO_1 +#define COLLIE_TC35143_GPIO_VPEN_ON UCB_IO_2 +#define COLLIE_TC35143_GPIO_IR_ON UCB_IO_3 +#define COLLIE_TC35143_GPIO_AMP_ON UCB_IO_4 +#define COLLIE_TC35143_GPIO_VERSION1 UCB_IO_5 +#define COLLIE_TC35143_GPIO_FS8KLPF UCB_IO_5 +#define COLLIE_TC35143_GPIO_BUZZER_BIAS UCB_IO_6 +#define COLLIE_GPIO_MBAT_ON (COLLIE_TC35143_GPIO_BASE + 7) +#define COLLIE_GPIO_BBAT_ON (COLLIE_TC35143_GPIO_BASE + 8) +#define COLLIE_GPIO_TMP_ON (COLLIE_TC35143_GPIO_BASE + 9) +#define COLLIE_TC35143_GPIO_IN (UCB_IO_0 | UCB_IO_2 | UCB_IO_5) +#define COLLIE_TC35143_GPIO_OUT (UCB_IO_1 | UCB_IO_3 | UCB_IO_4 \ + | UCB_IO_6) #endif -- cgit v0.10.2 From f1fce597e68c91f04381ad869579fd2fe6064101 Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Tue, 10 Feb 2009 14:12:29 +0100 Subject: collie: add battery driver This driver is based on tosa_battery.c. diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig index cea6cef..1186749 100644 --- a/drivers/power/Kconfig +++ b/drivers/power/Kconfig @@ -77,6 +77,13 @@ config BATTERY_TOSA Say Y to enable support for the battery on the Sharp Zaurus SL-6000 (tosa) models. +config BATTERY_COLLIE + tristate "Sharp SL-5500 (collie) battery" + depends on SA1100_COLLIE && MCP_UCB1200 + help + Say Y to enable support for the battery on the Sharp Zaurus + SL-5500 (collie) models. + config BATTERY_WM97XX bool "WM97xx generic battery driver" depends on TOUCHSCREEN_WM97XX=y diff --git a/drivers/power/Makefile b/drivers/power/Makefile index b96f29d..356cdfd 100644 --- a/drivers/power/Makefile +++ b/drivers/power/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_BATTERY_DS2782) += ds2782_battery.o obj-$(CONFIG_BATTERY_PMU) += pmu_battery.o obj-$(CONFIG_BATTERY_OLPC) += olpc_battery.o obj-$(CONFIG_BATTERY_TOSA) += tosa_battery.o +obj-$(CONFIG_BATTERY_COLLIE) += collie_battery.o obj-$(CONFIG_BATTERY_WM97XX) += wm97xx_battery.o obj-$(CONFIG_BATTERY_BQ27x00) += bq27x00_battery.o obj-$(CONFIG_BATTERY_DA9030) += da9030_battery.o diff --git a/drivers/power/collie_battery.c b/drivers/power/collie_battery.c new file mode 100644 index 0000000..039f41a --- /dev/null +++ b/drivers/power/collie_battery.c @@ -0,0 +1,418 @@ +/* + * Battery and Power Management code for the Sharp SL-5x00 + * + * Copyright (C) 2009 Thomas Kunze + * + * based on tosa_battery.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static DEFINE_MUTEX(bat_lock); /* protects gpio pins */ +static struct work_struct bat_work; +static struct ucb1x00 *ucb; + +struct collie_bat { + int status; + struct power_supply psy; + int full_chrg; + + struct mutex work_lock; /* protects data */ + + bool (*is_present)(struct collie_bat *bat); + int gpio_full; + int gpio_charge_on; + + int technology; + + int gpio_bat; + int adc_bat; + int adc_bat_divider; + int bat_max; + int bat_min; + + int gpio_temp; + int adc_temp; + int adc_temp_divider; +}; + +static struct collie_bat collie_bat_main; + +static unsigned long collie_read_bat(struct collie_bat *bat) +{ + unsigned long value = 0; + + if (bat->gpio_bat < 0 || bat->adc_bat < 0) + return 0; + mutex_lock(&bat_lock); + gpio_set_value(bat->gpio_bat, 1); + msleep(5); + ucb1x00_adc_enable(ucb); + value = ucb1x00_adc_read(ucb, bat->adc_bat, UCB_SYNC); + ucb1x00_adc_disable(ucb); + gpio_set_value(bat->gpio_bat, 0); + mutex_unlock(&bat_lock); + value = value * 1000000 / bat->adc_bat_divider; + + return value; +} + +static unsigned long collie_read_temp(struct collie_bat *bat) +{ + unsigned long value = 0; + if (bat->gpio_temp < 0 || bat->adc_temp < 0) + return 0; + + mutex_lock(&bat_lock); + gpio_set_value(bat->gpio_temp, 1); + msleep(5); + ucb1x00_adc_enable(ucb); + value = ucb1x00_adc_read(ucb, bat->adc_temp, UCB_SYNC); + ucb1x00_adc_disable(ucb); + gpio_set_value(bat->gpio_temp, 0); + mutex_unlock(&bat_lock); + + value = value * 10000 / bat->adc_temp_divider; + + return value; +} + +static int collie_bat_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + int ret = 0; + struct collie_bat *bat = container_of(psy, struct collie_bat, psy); + + if (bat->is_present && !bat->is_present(bat) + && psp != POWER_SUPPLY_PROP_PRESENT) { + return -ENODEV; + } + + switch (psp) { + case POWER_SUPPLY_PROP_STATUS: + val->intval = bat->status; + break; + case POWER_SUPPLY_PROP_TECHNOLOGY: + val->intval = bat->technology; + break; + case POWER_SUPPLY_PROP_VOLTAGE_NOW: + val->intval = collie_read_bat(bat); + break; + case POWER_SUPPLY_PROP_VOLTAGE_MAX: + if (bat->full_chrg == -1) + val->intval = bat->bat_max; + else + val->intval = bat->full_chrg; + break; + case POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN: + val->intval = bat->bat_max; + break; + case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN: + val->intval = bat->bat_min; + break; + case POWER_SUPPLY_PROP_TEMP: + val->intval = collie_read_temp(bat); + break; + case POWER_SUPPLY_PROP_PRESENT: + val->intval = bat->is_present ? bat->is_present(bat) : 1; + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static void collie_bat_external_power_changed(struct power_supply *psy) +{ + schedule_work(&bat_work); +} + +static irqreturn_t collie_bat_gpio_isr(int irq, void *data) +{ + pr_info("collie_bat_gpio irq: %d\n", gpio_get_value(irq_to_gpio(irq))); + schedule_work(&bat_work); + return IRQ_HANDLED; +} + +static void collie_bat_update(struct collie_bat *bat) +{ + int old; + struct power_supply *psy = &bat->psy; + + mutex_lock(&bat->work_lock); + + old = bat->status; + + if (bat->is_present && !bat->is_present(bat)) { + printk(KERN_NOTICE "%s not present\n", psy->name); + bat->status = POWER_SUPPLY_STATUS_UNKNOWN; + bat->full_chrg = -1; + } else if (power_supply_am_i_supplied(psy)) { + if (bat->status == POWER_SUPPLY_STATUS_DISCHARGING) { + gpio_set_value(bat->gpio_charge_on, 1); + mdelay(15); + } + + if (gpio_get_value(bat->gpio_full)) { + if (old == POWER_SUPPLY_STATUS_CHARGING || + bat->full_chrg == -1) + bat->full_chrg = collie_read_bat(bat); + + gpio_set_value(bat->gpio_charge_on, 0); + bat->status = POWER_SUPPLY_STATUS_FULL; + } else { + gpio_set_value(bat->gpio_charge_on, 1); + bat->status = POWER_SUPPLY_STATUS_CHARGING; + } + } else { + gpio_set_value(bat->gpio_charge_on, 0); + bat->status = POWER_SUPPLY_STATUS_DISCHARGING; + } + + if (old != bat->status) + power_supply_changed(psy); + + mutex_unlock(&bat->work_lock); +} + +static void collie_bat_work(struct work_struct *work) +{ + collie_bat_update(&collie_bat_main); +} + + +static enum power_supply_property collie_bat_main_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_TECHNOLOGY, + POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_MAX, + POWER_SUPPLY_PROP_PRESENT, + POWER_SUPPLY_PROP_TEMP, +}; + +static enum power_supply_property collie_bat_bu_props[] = { + POWER_SUPPLY_PROP_STATUS, + POWER_SUPPLY_PROP_TECHNOLOGY, + POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN, + POWER_SUPPLY_PROP_VOLTAGE_MAX, + POWER_SUPPLY_PROP_PRESENT, +}; + +static struct collie_bat collie_bat_main = { + .status = POWER_SUPPLY_STATUS_DISCHARGING, + .full_chrg = -1, + .psy = { + .name = "main-battery", + .type = POWER_SUPPLY_TYPE_BATTERY, + .properties = collie_bat_main_props, + .num_properties = ARRAY_SIZE(collie_bat_main_props), + .get_property = collie_bat_get_property, + .external_power_changed = collie_bat_external_power_changed, + .use_for_apm = 1, + }, + + .gpio_full = COLLIE_GPIO_CO, + .gpio_charge_on = COLLIE_GPIO_CHARGE_ON, + + .technology = POWER_SUPPLY_TECHNOLOGY_LIPO, + + .gpio_bat = COLLIE_GPIO_MBAT_ON, + .adc_bat = UCB_ADC_INP_AD1, + .adc_bat_divider = 155, + .bat_max = 4310000, + .bat_min = 1551 * 1000000 / 414, + + .gpio_temp = COLLIE_GPIO_TMP_ON, + .adc_temp = UCB_ADC_INP_AD0, + .adc_temp_divider = 10000, +}; + +static struct collie_bat collie_bat_bu = { + .status = POWER_SUPPLY_STATUS_UNKNOWN, + .full_chrg = -1, + + .psy = { + .name = "backup-battery", + .type = POWER_SUPPLY_TYPE_BATTERY, + .properties = collie_bat_bu_props, + .num_properties = ARRAY_SIZE(collie_bat_bu_props), + .get_property = collie_bat_get_property, + .external_power_changed = collie_bat_external_power_changed, + }, + + .gpio_full = -1, + .gpio_charge_on = -1, + + .technology = POWER_SUPPLY_TECHNOLOGY_LiMn, + + .gpio_bat = COLLIE_GPIO_BBAT_ON, + .adc_bat = UCB_ADC_INP_AD1, + .adc_bat_divider = 155, + .bat_max = 3000000, + .bat_min = 1900000, + + .gpio_temp = -1, + .adc_temp = -1, + .adc_temp_divider = -1, +}; + +static struct { + int gpio; + char *name; + bool output; + int value; +} gpios[] = { + { COLLIE_GPIO_CO, "main battery full", 0, 0 }, + { COLLIE_GPIO_MAIN_BAT_LOW, "main battery low", 0, 0 }, + { COLLIE_GPIO_CHARGE_ON, "main charge on", 1, 0 }, + { COLLIE_GPIO_MBAT_ON, "main battery", 1, 0 }, + { COLLIE_GPIO_TMP_ON, "main battery temp", 1, 0 }, + { COLLIE_GPIO_BBAT_ON, "backup battery", 1, 0 }, +}; + +#ifdef CONFIG_PM +static int collie_bat_suspend(struct ucb1x00_dev *dev, pm_message_t state) +{ + /* flush all pending status updates */ + flush_scheduled_work(); + return 0; +} + +static int collie_bat_resume(struct ucb1x00_dev *dev) +{ + /* things may have changed while we were away */ + schedule_work(&bat_work); + return 0; +} +#else +#define collie_bat_suspend NULL +#define collie_bat_resume NULL +#endif + +static int __devinit collie_bat_probe(struct ucb1x00_dev *dev) +{ + int ret; + int i; + + if (!machine_is_collie()) + return -ENODEV; + + ucb = dev->ucb; + + for (i = 0; i < ARRAY_SIZE(gpios); i++) { + ret = gpio_request(gpios[i].gpio, gpios[i].name); + if (ret) { + i--; + goto err_gpio; + } + + if (gpios[i].output) + ret = gpio_direction_output(gpios[i].gpio, + gpios[i].value); + else + ret = gpio_direction_input(gpios[i].gpio); + + if (ret) + goto err_gpio; + } + + mutex_init(&collie_bat_main.work_lock); + + INIT_WORK(&bat_work, collie_bat_work); + + ret = power_supply_register(&dev->ucb->dev, &collie_bat_main.psy); + if (ret) + goto err_psy_reg_main; + ret = power_supply_register(&dev->ucb->dev, &collie_bat_bu.psy); + if (ret) + goto err_psy_reg_bu; + + ret = request_irq(gpio_to_irq(COLLIE_GPIO_CO), + collie_bat_gpio_isr, + IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING, + "main full", &collie_bat_main); + if (!ret) { + schedule_work(&bat_work); + return 0; + } + power_supply_unregister(&collie_bat_bu.psy); +err_psy_reg_bu: + power_supply_unregister(&collie_bat_main.psy); +err_psy_reg_main: + + /* see comment in collie_bat_remove */ + flush_scheduled_work(); + + i--; +err_gpio: + for (; i >= 0; i--) + gpio_free(gpios[i].gpio); + + return ret; +} + +static void __devexit collie_bat_remove(struct ucb1x00_dev *dev) +{ + int i; + + free_irq(gpio_to_irq(COLLIE_GPIO_CO), &collie_bat_main); + + power_supply_unregister(&collie_bat_bu.psy); + power_supply_unregister(&collie_bat_main.psy); + + /* + * now flush all pending work. + * we won't get any more schedules, since all + * sources (isr and external_power_changed) + * are unregistered now. + */ + flush_scheduled_work(); + + for (i = ARRAY_SIZE(gpios) - 1; i >= 0; i--) + gpio_free(gpios[i].gpio); +} + +static struct ucb1x00_driver collie_bat_driver = { + .add = collie_bat_probe, + .remove = __devexit_p(collie_bat_remove), + .suspend = collie_bat_suspend, + .resume = collie_bat_resume, +}; + +static int __init collie_bat_init(void) +{ + return ucb1x00_register_driver(&collie_bat_driver); +} + +static void __exit collie_bat_exit(void) +{ + ucb1x00_unregister_driver(&collie_bat_driver); +} + +module_init(collie_bat_init); +module_exit(collie_bat_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Thomas Kunze"); +MODULE_DESCRIPTION("Collie battery driver"); -- cgit v0.10.2 From 9823b2d0f9547742fc40857f9ef153d6956266f2 Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Tue, 10 Feb 2009 13:48:32 +0100 Subject: collie: support pda_power driver This add the pda-power platform device to collie. diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index 395cf09..fdab9d9 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -89,6 +90,69 @@ static struct mcp_plat_data collie_mcp_data = { .gpio_base = COLLIE_TC35143_GPIO_BASE, }; +/* + * Collie AC IN + */ +static int collie_power_init(struct device *dev) +{ + int ret = gpio_request(COLLIE_GPIO_AC_IN, "ac in"); + if (ret) + goto err_gpio_req; + + ret = gpio_direction_input(COLLIE_GPIO_AC_IN); + if (ret) + goto err_gpio_in; + + return 0; + +err_gpio_in: + gpio_free(COLLIE_GPIO_AC_IN); +err_gpio_req: + return ret; +} + +static void collie_power_exit(struct device *dev) +{ + gpio_free(COLLIE_GPIO_AC_IN); +} + +static int collie_power_ac_online(void) +{ + return gpio_get_value(COLLIE_GPIO_AC_IN) == 2; +} + +static char *collie_ac_supplied_to[] = { + "main-battery", + "backup-battery", +}; + +static struct pda_power_pdata collie_power_data = { + .init = collie_power_init, + .is_ac_online = collie_power_ac_online, + .exit = collie_power_exit, + .supplied_to = collie_ac_supplied_to, + .num_supplicants = ARRAY_SIZE(collie_ac_supplied_to), +}; + +static struct resource collie_power_resource[] = { + { + .name = "ac", + .start = gpio_to_irq(COLLIE_GPIO_AC_IN), + .end = gpio_to_irq(COLLIE_GPIO_AC_IN), + .flags = IORESOURCE_IRQ | + IORESOURCE_IRQ_HIGHEDGE | + IORESOURCE_IRQ_LOWEDGE, + }, +}; + +static struct platform_device collie_power_device = { + .name = "pda-power", + .id = -1, + .dev.platform_data = &collie_power_data, + .resource = collie_power_resource, + .num_resources = ARRAY_SIZE(collie_power_resource), +}; + #ifdef CONFIG_SHARP_LOCOMO /* * low-level UART features. @@ -180,6 +244,7 @@ struct platform_device collie_locomo_device = { static struct platform_device *devices[] __initdata = { &collie_locomo_device, &colliescoop_device, + &collie_power_device, }; static struct mtd_partition collie_partitions[] = { -- cgit v0.10.2 From 956ffd027bedc4106b901eb6a50f0a6c6de4113d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 25 Nov 2009 01:15:46 -0600 Subject: perf trace: Add scripting ops Adds an interface, scripting_ops, that when implemented for a particular scripting language enables built-in support for trace stream processing using that language. The interface is designed to enable full-fledged language interpreters to be embedded inside the perf executable and thereby make the full capabilities of the supported languages available for trace processing. See below for details on the interface. This patch also adds a couple command-line options to 'perf trace': The -s option option is used to specify the script to be run. Script names that can be used with -s take the form: [language spec:]scriptname[.ext] Scripting languages register a set of 'language specs' that can be used to specify scripts for the registered languages. The specs can be used either as prefixes or extensions. If [language spec:] is used, the script is taken as a script of the matching language regardless of any extension it might have. If [language spec:] is not used, [.ext] is used to look up the language it corresponds to. Language specs are case insensitive. e.g. Perl scripts can be specified in the following ways: Perl:scriptname pl:scriptname.py # extension ignored PL:scriptname scriptname.pl scriptname.perl The -g [language spec] option gives users an easy starting point for writing scripts in the specified language. Scripting support for a particular language can implement a generate_script() scripting op that outputs an empty (or near-empty) set of handlers for all the events contained in a given perf.data trace file - this option gives users a direct way to access that. Adding support for a scripting language --------------------------------------- The main thing that needs to be done do add support for a new language is to implement the scripting_ops interface: It consists of the following four functions: start_script() stop_script() process_event() generate_script() start_script() is called before any events are processed, and is meant to give the scripting language support an opportunity to set things up to receive events e.g. create and initialize an instance of a language interpreter. stop_script() is called after all events are processed, and is meant to give the scripting language support an opportunity to clean up e.g. destroy the interpreter instance, etc. process_event() is called once for each event and takes as its main parameter a pointer to the binary trace event record to be processed. The implementation is responsible for picking out the binary fields from the event record and sending them to the script handler function associated with that event e.g. a function derived from the event name it's meant to handle e.g. 'sched::sched_switch()'. The 'format' information for trace events can be used to parse the binary data and map it into a form usable by a given scripting language; see the Perl implemention in subsequent patches for one possible way to leverage the existing trace format parsing code in perf and map that info into specific scripting language types. generate_script() should generate a ready-to-run script for the current set of events in the trace, preferably with bodies that print out every field for each event. Again, look at the Perl implementation for clues as to how that can be done. This is an optional, but very useful op. Support for a given language should also add a language-specific setup function and call it from setup_scripting(). The language-specific setup function associates the the scripting ops for that language with one or more 'language specifiers' (see below) using script_spec_register(). When a script name is specified on the command line, the scripting ops associated with the specified language are used to instantiate and use the appropriate interpreter to process the trace stream. In general, it should be relatively easy to add support for a new language, especially if the language implementation supports an interface allowing an interpreter to be 'embedded' inside another program (in this case the containing program will be 'perf trace'). If so, it should be relatively straightforward to translate trace events into invocations of user-defined script functions where e.g. the function name corresponds to the event type and the function parameters correspond to the event fields. The event and field type information exported by the event tracing infrastructure (via the event 'format' files) should be enough to parse and send any piece of trace data to the user script. The easiest way to see how this can be done would be to look at the Perl implementation contained in perf/util/trace-event-perl.c/.h. There are a couple of other things that aren't covered by the scripting_ops or setup interface and are technically optional, but should be implemented if possible. One of these is support for 'flag' and 'symbolic' fields e.g. being able to use more human-readable values such as 'GFP_KERNEL' or HI/BLOCK_IOPOLL/TASKLET in place of raw flag values. See the Perl implementation to see how this can be done. The other thing is support for 'calling back' into the perf executable to access e.g. uncommon fields not passed by default into handler functions, or any metadata the implementation might want to make available to users via the language interface. Again, see the Perl implementation for examples. Signed-off-by: Tom Zanussi Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: anton@samba.org Cc: hch@infradead.org LKML-Reference: <1259133352-23685-2-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a775025..e96bb53 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -6,6 +6,46 @@ #include "util/thread.h" #include "util/header.h" +static char const *script_name; +static char const *generate_script_lang; + +static int default_start_script(const char *script __attribute((unused))) +{ + return 0; +} + +static int default_stop_script(void) +{ + return 0; +} + +static int default_generate_script(const char *outfile __attribute ((unused))) +{ + return 0; +} + +static struct scripting_ops default_scripting_ops = { + .start_script = default_start_script, + .stop_script = default_stop_script, + .process_event = print_event, + .generate_script = default_generate_script, +}; + +static struct scripting_ops *scripting_ops; + +static void setup_scripting(void) +{ + /* make sure PERF_EXEC_PATH is set for scripts */ + perf_set_argv_exec_path(perf_exec_path()); + + scripting_ops = &default_scripting_ops; +} + +static int cleanup_scripting(void) +{ + return scripting_ops->stop_script(); +} + #include "util/parse-options.h" #include "perf.h" @@ -13,11 +53,12 @@ #include "util/trace-event.h" #include "util/data_map.h" +#include "util/exec_cmd.h" -static char const *input_name = "perf.data"; +static char const *input_name = "perf.data"; -static struct perf_header *header; -static u64 sample_type; +static struct perf_header *header; +static u64 sample_type; static int process_sample_event(event_t *event) { @@ -69,7 +110,8 @@ static int process_sample_event(event_t *event) * field, although it should be the same than this perf * event pid */ - print_event(cpu, raw->data, raw->size, timestamp, thread->comm); + scripting_ops->process_event(cpu, raw->data, raw->size, + timestamp, thread->comm); } event__stats.total += period; @@ -105,6 +147,154 @@ static int __cmd_trace(void) 0, 0, &event__cwdlen, &event__cwd); } +struct script_spec { + struct list_head node; + struct scripting_ops *ops; + char spec[0]; +}; + +LIST_HEAD(script_specs); + +static struct script_spec *script_spec__new(const char *spec, + struct scripting_ops *ops) +{ + struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1); + + if (s != NULL) { + strcpy(s->spec, spec); + s->ops = ops; + } + + return s; +} + +static void script_spec__delete(struct script_spec *s) +{ + free(s->spec); + free(s); +} + +static void script_spec__add(struct script_spec *s) +{ + list_add_tail(&s->node, &script_specs); +} + +static struct script_spec *script_spec__find(const char *spec) +{ + struct script_spec *s; + + list_for_each_entry(s, &script_specs, node) + if (strcasecmp(s->spec, spec) == 0) + return s; + return NULL; +} + +static struct script_spec *script_spec__findnew(const char *spec, + struct scripting_ops *ops) +{ + struct script_spec *s = script_spec__find(spec); + + if (s) + return s; + + s = script_spec__new(spec, ops); + if (!s) + goto out_delete_spec; + + script_spec__add(s); + + return s; + +out_delete_spec: + script_spec__delete(s); + + return NULL; +} + +int script_spec_register(const char *spec, struct scripting_ops *ops) +{ + struct script_spec *s; + + s = script_spec__find(spec); + if (s) + return -1; + + s = script_spec__findnew(spec, ops); + if (!s) + return -1; + + return 0; +} + +static struct scripting_ops *script_spec__lookup(const char *spec) +{ + struct script_spec *s = script_spec__find(spec); + if (!s) + return NULL; + + return s->ops; +} + +static void list_available_languages(void) +{ + struct script_spec *s; + + fprintf(stderr, "\n"); + fprintf(stderr, "Scripting language extensions (used in " + "perf trace -s [spec:]script.[spec]):\n\n"); + + list_for_each_entry(s, &script_specs, node) + fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name); + + fprintf(stderr, "\n"); +} + +static int parse_scriptname(const struct option *opt __used, + const char *str, int unset __used) +{ + char spec[PATH_MAX]; + const char *script, *ext; + int len; + + if (strcmp(str, "list") == 0) { + list_available_languages(); + return 0; + } + + script = strchr(str, ':'); + if (script) { + len = script - str; + if (len >= PATH_MAX) { + fprintf(stderr, "invalid language specifier"); + return -1; + } + strncpy(spec, str, len); + spec[len] = '\0'; + scripting_ops = script_spec__lookup(spec); + if (!scripting_ops) { + fprintf(stderr, "invalid language specifier"); + return -1; + } + script++; + } else { + script = str; + ext = strchr(script, '.'); + if (!ext) { + fprintf(stderr, "invalid script extension"); + return -1; + } + scripting_ops = script_spec__lookup(++ext); + if (!scripting_ops) { + fprintf(stderr, "invalid script extension"); + return -1; + } + } + + script_name = strdup(script); + + return 0; +} + static const char * const annotate_usage[] = { "perf trace [] ", NULL @@ -117,13 +307,23 @@ static const struct option options[] = { "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('l', "latency", &latency_format, "show latency attributes (irqs/preemption disabled, etc)"), + OPT_CALLBACK('s', "script", NULL, "name", + "script file name (lang:script name, script name, or *)", + parse_scriptname), + OPT_STRING('g', "gen-script", &generate_script_lang, "lang", + "generate perf-trace.xx script in specified language"), + OPT_END() }; int cmd_trace(int argc, const char **argv, const char *prefix __used) { + int err; + symbol__init(0); + setup_scripting(); + argc = parse_options(argc, argv, options, annotate_usage, 0); if (argc) { /* @@ -136,5 +336,50 @@ int cmd_trace(int argc, const char **argv, const char *prefix __used) setup_pager(); - return __cmd_trace(); + if (generate_script_lang) { + struct stat perf_stat; + + int input = open(input_name, O_RDONLY); + if (input < 0) { + perror("failed to open file"); + exit(-1); + } + + err = fstat(input, &perf_stat); + if (err < 0) { + perror("failed to stat file"); + exit(-1); + } + + if (!perf_stat.st_size) { + fprintf(stderr, "zero-sized file, nothing to do!\n"); + exit(0); + } + + scripting_ops = script_spec__lookup(generate_script_lang); + if (!scripting_ops) { + fprintf(stderr, "invalid language specifier"); + return -1; + } + + header = perf_header__new(); + if (header == NULL) + return -1; + + perf_header__read(header, input); + err = scripting_ops->generate_script("perf-trace"); + goto out; + } + + if (script_name) { + err = scripting_ops->start_script(script_name); + if (err) + goto out; + } + + err = __cmd_trace(); + + cleanup_scripting(); +out: + return err; } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index dd51c687..e7aaf00 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -259,4 +259,15 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, }; +struct scripting_ops { + const char *name; + int (*start_script) (const char *); + int (*stop_script) (void); + void (*process_event) (int cpu, void *data, int size, + unsigned long long nsecs, char *comm); + int (*generate_script) (const char *outfile); +}; + +int script_spec_register(const char *spec, struct scripting_ops *ops); + #endif /* __PERF_TRACE_EVENTS_H */ -- cgit v0.10.2 From eb9a42caa7a926beb935a22bc59d981b35f0b652 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 25 Nov 2009 01:15:47 -0600 Subject: perf trace: Add flag/symbolic format_flags It's useful to know whether a field is a flag or symbolic field for e.g. when generating scripts - it allows us to translate those fields specially rather than literally as plain numeric values. Signed-off-by: Tom Zanussi Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: anton@samba.org Cc: hch@infradead.org LKML-Reference: <1259133352-23685-3-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 7021dc1..85d7163 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -48,6 +48,11 @@ static unsigned long long input_buf_siz; static int cpus; static int long_size; +static int is_flag_field; +static int is_symbolic_field; + +static struct format_field * +find_any_field(struct event *event, const char *name); static void init_input_buf(char *buf, unsigned long long size) { @@ -1301,6 +1306,16 @@ process_entry(struct event *event __unused, struct print_arg *arg, arg->type = PRINT_FIELD; arg->field.name = field; + if (is_flag_field) { + arg->field.field = find_any_field(event, arg->field.name); + arg->field.field->flags |= FIELD_IS_FLAG; + is_flag_field = 0; + } else if (is_symbolic_field) { + arg->field.field = find_any_field(event, arg->field.name); + arg->field.field->flags |= FIELD_IS_SYMBOLIC; + is_symbolic_field = 0; + } + type = read_token(&token); *tok = token; @@ -1668,9 +1683,11 @@ process_arg_token(struct event *event, struct print_arg *arg, type = process_entry(event, arg, &token); } else if (strcmp(token, "__print_flags") == 0) { free_token(token); + is_flag_field = 1; type = process_flags(event, arg, &token); } else if (strcmp(token, "__print_symbolic") == 0) { free_token(token); + is_symbolic_field = 1; type = process_symbols(event, arg, &token); } else if (strcmp(token, "__get_str") == 0) { free_token(token); diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index e7aaf00..aeb9157 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -29,6 +29,8 @@ enum format_flags { FIELD_IS_SIGNED = 4, FIELD_IS_STRING = 8, FIELD_IS_DYNAMIC = 16, + FIELD_IS_FLAG = 32, + FIELD_IS_SYMBOLIC = 64, }; struct format_field { -- cgit v0.10.2 From 16c632de64a74644a46e7636db26b2cfb530ca13 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 25 Nov 2009 01:15:48 -0600 Subject: perf trace: Add Perl scripting support Implement trace_scripting_ops to make Perl a supported perf trace scripting language. Additionally adds code that allows Perl trace scripts to access the 'flag' and 'symbolic' (__print_flags(), __print_symbolic()) field information parsed from the trace format files. Also adds the Perl implementation of the generate_script() trace_scripting_op, which creates a ready-to-run perf trace Perl script based on existing trace data. Scripts generated by this implementation print out all the fields for each event mentioned in perf.data (and will detect and generate the proper scripting code for 'flag' and 'symbolic' fields), and will additionally generate handlers for the special 'trace_unhandled', 'trace_begin' and 'trace_end' handlers. Script authors can simply remove the printing code to implement their own custom event handling. Signed-off-by: Tom Zanussi Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: anton@samba.org Cc: hch@infradead.org LKML-Reference: <1259133352-23685-4-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index f1537a9..19e37cd 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -407,6 +407,7 @@ LIB_OBJS += util/thread.o LIB_OBJS += util/trace-event-parse.o LIB_OBJS += util/trace-event-read.o LIB_OBJS += util/trace-event-info.o +LIB_OBJS += util/trace-event-perl.o LIB_OBJS += util/svghelper.o LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o @@ -489,6 +490,15 @@ else LIB_OBJS += util/probe-finder.o endif +PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts` +PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts` + +ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o /dev/null $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y) + BASIC_CFLAGS += -DNO_LIBPERL +else + ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) +endif + ifdef NO_DEMANGLE BASIC_CFLAGS += -DNO_DEMANGLE else @@ -860,6 +870,9 @@ util/hweight.o: ../../lib/hweight.c PERF-CFLAGS util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS $(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +util/trace-event-perl.o: util/trace-event-perl.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o util/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter $< + perf-%$X: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e96bb53..ca8ebf1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -38,6 +38,8 @@ static void setup_scripting(void) /* make sure PERF_EXEC_PATH is set for scripts */ perf_set_argv_exec_path(perf_exec_path()); + setup_perl_scripting(); + scripting_ops = &default_scripting_ops; } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 85d7163..1f16495 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1888,7 +1888,7 @@ find_any_field(struct event *event, const char *name) return find_field(event, name); } -static unsigned long long read_size(void *ptr, int size) +unsigned long long read_size(void *ptr, int size) { switch (size) { case 1: @@ -1973,7 +1973,7 @@ int trace_parse_common_type(void *data) "common_type"); } -static int parse_common_pid(void *data) +int trace_parse_common_pid(void *data) { static int pid_offset; static int pid_size; @@ -2025,6 +2025,14 @@ struct event *trace_find_event(int id) return event; } +struct event *trace_find_next_event(struct event *event) +{ + if (!event) + return event_list; + + return event->next; +} + static unsigned long long eval_num_arg(void *data, int size, struct event *event, struct print_arg *arg) { @@ -2164,7 +2172,7 @@ static const struct flag flags[] = { { "HRTIMER_RESTART", 1 }, }; -static unsigned long long eval_flag(const char *flag) +unsigned long long eval_flag(const char *flag) { int i; @@ -2694,7 +2702,7 @@ get_return_for_leaf(int cpu, int cur_pid, unsigned long long cur_func, if (!(event->flags & EVENT_FL_ISFUNCRET)) return NULL; - pid = parse_common_pid(next->data); + pid = trace_parse_common_pid(next->data); field = find_field(event, "func"); if (!field) die("function return does not have field func"); @@ -2980,7 +2988,7 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs, return; } - pid = parse_common_pid(data); + pid = trace_parse_common_pid(data); if (event->flags & (EVENT_FL_ISFUNCENT | EVENT_FL_ISFUNCRET)) return pretty_print_func_graph(data, size, event, cpu, diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c new file mode 100644 index 0000000..c56b08d --- /dev/null +++ b/tools/perf/util/trace-event-perl.c @@ -0,0 +1,552 @@ +/* + * trace-event-perl. Feed perf trace events to an embedded Perl interpreter. + * + * Copyright (C) 2009 Tom Zanussi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include + +#include "../perf.h" +#include "util.h" +#include "trace-event.h" +#include "trace-event-perl.h" + +INTERP my_perl; + +#define FTRACE_MAX_EVENT \ + ((1 << (sizeof(unsigned short) * 8)) - 1) + +struct event *events[FTRACE_MAX_EVENT]; + +static struct scripting_context *scripting_context; + +static char *cur_field_name; +static int zero_flag_atom; + +static void define_symbolic_value(const char *ev_name, + const char *field_name, + const char *field_value, + const char *field_str) +{ + unsigned long long value; + dSP; + + value = eval_flag(field_value); + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(sv_2mortal(newSVpv(ev_name, 0))); + XPUSHs(sv_2mortal(newSVpv(field_name, 0))); + XPUSHs(sv_2mortal(newSVuv(value))); + XPUSHs(sv_2mortal(newSVpv(field_str, 0))); + + PUTBACK; + if (get_cv("main::define_symbolic_value", 0)) + call_pv("main::define_symbolic_value", G_SCALAR); + SPAGAIN; + PUTBACK; + FREETMPS; + LEAVE; +} + +static void define_symbolic_values(struct print_flag_sym *field, + const char *ev_name, + const char *field_name) +{ + define_symbolic_value(ev_name, field_name, field->value, field->str); + if (field->next) + define_symbolic_values(field->next, ev_name, field_name); +} + +static void define_symbolic_field(const char *ev_name, + const char *field_name) +{ + dSP; + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(sv_2mortal(newSVpv(ev_name, 0))); + XPUSHs(sv_2mortal(newSVpv(field_name, 0))); + + PUTBACK; + if (get_cv("main::define_symbolic_field", 0)) + call_pv("main::define_symbolic_field", G_SCALAR); + SPAGAIN; + PUTBACK; + FREETMPS; + LEAVE; +} + +static void define_flag_value(const char *ev_name, + const char *field_name, + const char *field_value, + const char *field_str) +{ + unsigned long long value; + dSP; + + value = eval_flag(field_value); + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(sv_2mortal(newSVpv(ev_name, 0))); + XPUSHs(sv_2mortal(newSVpv(field_name, 0))); + XPUSHs(sv_2mortal(newSVuv(value))); + XPUSHs(sv_2mortal(newSVpv(field_str, 0))); + + PUTBACK; + if (get_cv("main::define_flag_value", 0)) + call_pv("main::define_flag_value", G_SCALAR); + SPAGAIN; + PUTBACK; + FREETMPS; + LEAVE; +} + +static void define_flag_values(struct print_flag_sym *field, + const char *ev_name, + const char *field_name) +{ + define_flag_value(ev_name, field_name, field->value, field->str); + if (field->next) + define_flag_values(field->next, ev_name, field_name); +} + +static void define_flag_field(const char *ev_name, + const char *field_name, + const char *delim) +{ + dSP; + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(sv_2mortal(newSVpv(ev_name, 0))); + XPUSHs(sv_2mortal(newSVpv(field_name, 0))); + XPUSHs(sv_2mortal(newSVpv(delim, 0))); + + PUTBACK; + if (get_cv("main::define_flag_field", 0)) + call_pv("main::define_flag_field", G_SCALAR); + SPAGAIN; + PUTBACK; + FREETMPS; + LEAVE; +} + +static void define_event_symbols(struct event *event, + const char *ev_name, + struct print_arg *args) +{ + switch (args->type) { + case PRINT_NULL: + break; + case PRINT_ATOM: + define_flag_value(ev_name, cur_field_name, "0", + args->atom.atom); + zero_flag_atom = 0; + break; + case PRINT_FIELD: + if (cur_field_name) + free(cur_field_name); + cur_field_name = strdup(args->field.name); + break; + case PRINT_FLAGS: + define_event_symbols(event, ev_name, args->flags.field); + define_flag_field(ev_name, cur_field_name, args->flags.delim); + define_flag_values(args->flags.flags, ev_name, cur_field_name); + break; + case PRINT_SYMBOL: + define_event_symbols(event, ev_name, args->symbol.field); + define_symbolic_field(ev_name, cur_field_name); + define_symbolic_values(args->symbol.symbols, ev_name, + cur_field_name); + break; + case PRINT_STRING: + break; + case PRINT_TYPE: + define_event_symbols(event, ev_name, args->typecast.item); + break; + case PRINT_OP: + if (strcmp(args->op.op, ":") == 0) + zero_flag_atom = 1; + define_event_symbols(event, ev_name, args->op.left); + define_event_symbols(event, ev_name, args->op.right); + break; + default: + /* we should warn... */ + return; + } + + if (args->next) + define_event_symbols(event, ev_name, args->next); +} + +static inline struct event *find_cache_event(int type) +{ + static char ev_name[256]; + struct event *event; + + if (events[type]) + return events[type]; + + events[type] = event = trace_find_event(type); + if (!event) + return NULL; + + sprintf(ev_name, "%s::%s", event->system, event->name); + + define_event_symbols(event, ev_name, event->print_fmt.args); + + return event; +} + +static void perl_process_event(int cpu, void *data, + int size __attribute((unused)), + unsigned long long nsecs, char *comm) +{ + struct format_field *field; + static char handler[256]; + unsigned long long val; + unsigned long s, ns; + struct event *event; + int type; + int pid; + + dSP; + + type = trace_parse_common_type(data); + + event = find_cache_event(type); + if (!event) + die("ug! no event found for type %d", type); + + pid = trace_parse_common_pid(data); + + sprintf(handler, "%s::%s", event->system, event->name); + + s = nsecs / NSECS_PER_SEC; + ns = nsecs - s * NSECS_PER_SEC; + + scripting_context->event_data = data; + + ENTER; + SAVETMPS; + PUSHMARK(SP); + + XPUSHs(sv_2mortal(newSVpv(handler, 0))); + XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context)))); + XPUSHs(sv_2mortal(newSVuv(cpu))); + XPUSHs(sv_2mortal(newSVuv(s))); + XPUSHs(sv_2mortal(newSVuv(ns))); + XPUSHs(sv_2mortal(newSViv(pid))); + XPUSHs(sv_2mortal(newSVpv(comm, 0))); + + /* common fields other than pid can be accessed via xsub fns */ + + for (field = event->format.fields; field; field = field->next) { + if (field->flags & FIELD_IS_STRING) { + int offset; + if (field->flags & FIELD_IS_DYNAMIC) { + offset = *(int *)(data + field->offset); + offset &= 0xffff; + } else + offset = field->offset; + XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); + } else { /* FIELD_IS_NUMERIC */ + val = read_size(data + field->offset, field->size); + if (field->flags & FIELD_IS_SIGNED) { + XPUSHs(sv_2mortal(newSViv(val))); + } else { + XPUSHs(sv_2mortal(newSVuv(val))); + } + } + } + + PUTBACK; + if (get_cv(handler, 0)) + call_pv(handler, G_SCALAR); + else if (get_cv("main::trace_unhandled", 0)) { + XPUSHs(sv_2mortal(newSVpv(handler, 0))); + XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context)))); + XPUSHs(sv_2mortal(newSVuv(cpu))); + XPUSHs(sv_2mortal(newSVuv(nsecs))); + XPUSHs(sv_2mortal(newSViv(pid))); + XPUSHs(sv_2mortal(newSVpv(comm, 0))); + call_pv("main::trace_unhandled", G_SCALAR); + } + SPAGAIN; + PUTBACK; + FREETMPS; + LEAVE; +} + +static void run_start_sub(void) +{ + dSP; /* access to Perl stack */ + PUSHMARK(SP); + + if (get_cv("main::trace_begin", 0)) + call_pv("main::trace_begin", G_DISCARD | G_NOARGS); +} + +/* + * Start trace script + */ +static int perl_start_script(const char *script) +{ + const char *command_line[2] = { "", NULL }; + + command_line[1] = script; + + my_perl = perl_alloc(); + perl_construct(my_perl); + + if (perl_parse(my_perl, NULL, 2, (char **)command_line, (char **)NULL)) + return -1; + + perl_run(my_perl); + if (SvTRUE(ERRSV)) + return -1; + + run_start_sub(); + + fprintf(stderr, "perf trace started with Perl script %s\n\n", script); + + return 0; +} + +/* + * Stop trace script + */ +static int perl_stop_script(void) +{ + dSP; /* access to Perl stack */ + PUSHMARK(SP); + + if (get_cv("main::trace_end", 0)) + call_pv("main::trace_end", G_DISCARD | G_NOARGS); + + perl_destruct(my_perl); + perl_free(my_perl); + + fprintf(stderr, "\nperf trace Perl script stopped\n"); + + return 0; +} + +static int perl_generate_script(const char *outfile) +{ + struct event *event = NULL; + struct format_field *f; + char fname[PATH_MAX]; + int not_first, count; + FILE *ofp; + + sprintf(fname, "%s.pl", outfile); + ofp = fopen(fname, "w"); + if (ofp == NULL) { + fprintf(stderr, "couldn't open %s\n", fname); + return -1; + } + + fprintf(ofp, "# perf trace event handlers, " + "generated by perf trace -g perl\n"); + + fprintf(ofp, "# Licensed under the terms of the GNU GPL" + " License version 2\n\n"); + + fprintf(ofp, "# The common_* event handler fields are the most useful " + "fields common to\n"); + + fprintf(ofp, "# all events. They don't necessarily correspond to " + "the 'common_*' fields\n"); + + fprintf(ofp, "# in the format files. Those fields not available as " + "handler params can\n"); + + fprintf(ofp, "# be retrieved using Perl functions of the form " + "common_*($context).\n"); + + fprintf(ofp, "# See Context.pm for the list of available " + "functions.\n\n"); + + fprintf(ofp, "use lib \"$ENV{'PERF_EXEC_PATH'}/scripts/perl/" + "Perf-Trace-Util/lib\";\n"); + + fprintf(ofp, "use lib \"./Perf-Trace-Util/lib\";\n"); + fprintf(ofp, "use Perf::Trace::Core;\n"); + fprintf(ofp, "use Perf::Trace::Context;\n"); + fprintf(ofp, "use Perf::Trace::Util;\n\n"); + + fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n"); + fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n"); + + while ((event = trace_find_next_event(event))) { + fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name); + fprintf(ofp, "\tmy ("); + + fprintf(ofp, "$event_name, "); + fprintf(ofp, "$context, "); + fprintf(ofp, "$common_cpu, "); + fprintf(ofp, "$common_secs, "); + fprintf(ofp, "$common_nsecs,\n"); + fprintf(ofp, "\t $common_pid, "); + fprintf(ofp, "$common_comm,\n\t "); + + not_first = 0; + count = 0; + + for (f = event->format.fields; f; f = f->next) { + if (not_first++) + fprintf(ofp, ", "); + if (++count % 5 == 0) + fprintf(ofp, "\n\t "); + + fprintf(ofp, "$%s", f->name); + } + fprintf(ofp, ") = @_;\n\n"); + + fprintf(ofp, "\tprint_header($event_name, $common_cpu, " + "$common_secs, $common_nsecs,\n\t " + "$common_pid, $common_comm);\n\n"); + + fprintf(ofp, "\tprintf(\""); + + not_first = 0; + count = 0; + + for (f = event->format.fields; f; f = f->next) { + if (not_first++) + fprintf(ofp, ", "); + if (count && count % 4 == 0) { + fprintf(ofp, "\".\n\t \""); + } + count++; + + fprintf(ofp, "%s=", f->name); + if (f->flags & FIELD_IS_STRING || + f->flags & FIELD_IS_FLAG || + f->flags & FIELD_IS_SYMBOLIC) + fprintf(ofp, "%%s"); + else if (f->flags & FIELD_IS_SIGNED) + fprintf(ofp, "%%d"); + else + fprintf(ofp, "%%u"); + } + + fprintf(ofp, "\\n\",\n\t "); + + not_first = 0; + count = 0; + + for (f = event->format.fields; f; f = f->next) { + if (not_first++) + fprintf(ofp, ", "); + + if (++count % 5 == 0) + fprintf(ofp, "\n\t "); + + if (f->flags & FIELD_IS_FLAG) { + if ((count - 1) % 5 != 0) { + fprintf(ofp, "\n\t "); + count = 4; + } + fprintf(ofp, "flag_str(\""); + fprintf(ofp, "%s::%s\", ", event->system, + event->name); + fprintf(ofp, "\"%s\", $%s)", f->name, + f->name); + } else if (f->flags & FIELD_IS_SYMBOLIC) { + if ((count - 1) % 5 != 0) { + fprintf(ofp, "\n\t "); + count = 4; + } + fprintf(ofp, "symbol_str(\""); + fprintf(ofp, "%s::%s\", ", event->system, + event->name); + fprintf(ofp, "\"%s\", $%s)", f->name, + f->name); + } else + fprintf(ofp, "$%s", f->name); + } + + fprintf(ofp, ");\n"); + fprintf(ofp, "}\n\n"); + } + + fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, " + "$common_cpu, $common_secs, $common_nsecs,\n\t " + "$common_pid, $common_comm) = @_;\n\n"); + + fprintf(ofp, "\tprint_header($event_name, $common_cpu, " + "$common_secs, $common_nsecs,\n\t $common_pid, " + "$common_comm);\n}\n\n"); + + fprintf(ofp, "sub print_header\n{\n" + "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n" + "\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t " + "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}"); + + fclose(ofp); + + fprintf(stderr, "generated Perl script: %s\n", fname); + + return 0; +} + +struct scripting_ops perl_scripting_ops = { + .name = "Perl", + .start_script = perl_start_script, + .stop_script = perl_stop_script, + .process_event = perl_process_event, + .generate_script = perl_generate_script, +}; + +#ifdef NO_LIBPERL +void setup_perl_scripting(void) +{ + fprintf(stderr, "Perl scripting not supported." + " Install libperl-dev[el] and rebuild perf to get it.\n"); +} +#else +void setup_perl_scripting(void) +{ + int err; + err = script_spec_register("Perl", &perl_scripting_ops); + if (err) + die("error registering Perl script extension"); + + err = script_spec_register("pl", &perl_scripting_ops); + if (err) + die("error registering pl script extension"); + + scripting_context = malloc(sizeof(struct scripting_context)); +} +#endif diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h new file mode 100644 index 0000000..6c94fa9 --- /dev/null +++ b/tools/perf/util/trace-event-perl.h @@ -0,0 +1,42 @@ +#ifndef __PERF_TRACE_EVENT_PERL_H +#define __PERF_TRACE_EVENT_PERL_H +#ifdef NO_LIBPERL +typedef int INTERP; +#define dSP +#define ENTER +#define SAVETMPS +#define PUTBACK +#define SPAGAIN +#define FREETMPS +#define LEAVE +#define SP +#define ERRSV +#define G_SCALAR (0) +#define G_DISCARD (0) +#define G_NOARGS (0) +#define PUSHMARK(a) +#define SvTRUE(a) (0) +#define XPUSHs(s) +#define sv_2mortal(a) +#define newSVpv(a,b) +#define newSVuv(a) +#define newSViv(a) +#define get_cv(a,b) (0) +#define call_pv(a,b) (0) +#define perl_alloc() (0) +#define perl_construct(a) (0) +#define perl_parse(a,b,c,d,e) (0) +#define perl_run(a) (0) +#define perl_destruct(a) (0) +#define perl_free(a) (0) +#else +#include +#include +typedef PerlInterpreter * INTERP; +#endif + +struct scripting_context { + void *event_data; +}; + +#endif /* __PERF_TRACE_EVENT_PERL_H */ diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index aeb9157..b1e58d3 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -245,10 +245,14 @@ extern int latency_format; int parse_header_page(char *buf, unsigned long size); int trace_parse_common_type(void *data); +int trace_parse_common_pid(void *data); struct event *trace_find_event(int id); +struct event *trace_find_next_event(struct event *event); +unsigned long long read_size(void *ptr, int size); unsigned long long raw_field_value(struct event *event, const char *name, void *data); void *raw_field_ptr(struct event *event, const char *name, void *data); +unsigned long long eval_flag(const char *flag); int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); @@ -272,4 +276,7 @@ struct scripting_ops { int script_spec_register(const char *spec, struct scripting_ops *ops); +extern struct scripting_ops perl_scripting_ops; +void setup_perl_scripting(void); + #endif /* __PERF_TRACE_EVENTS_H */ -- cgit v0.10.2 From bcefe12eff5dca6fdfa94ed85e5bee66380d5cd9 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 25 Nov 2009 01:15:49 -0600 Subject: perf trace: Add perf trace scripting support modules for Perl Add Perf-Trace-Util Perl module and some scripts that use it. Core.pm contains Perl code to define and access flag and symbolic fields. Util.pm contains general-purpose utility functions. Also adds some makefile bits to install them in libexec/perf-core/scripts/perl (or wherever perfexec_instdir points). Signed-off-by: Tom Zanussi Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: anton@samba.org Cc: hch@infradead.org LKML-Reference: <1259133352-23685-5-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 19e37cd..efbc0e8 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -980,6 +980,13 @@ export perfexec_instdir install: all $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' $(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' + $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace' + $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl' + $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin' + $(INSTALL) scripts/perl/Perf-Trace-Util/Makefile.PL -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util' + $(INSTALL) scripts/perl/Perf-Trace-Util/README -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util' ifdef BUILT_INS $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL new file mode 100644 index 0000000..b0de02e --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL @@ -0,0 +1,12 @@ +use 5.010000; +use ExtUtils::MakeMaker; +# See lib/ExtUtils/MakeMaker.pm for details of how to influence +# the contents of the Makefile that is written. +WriteMakefile( + NAME => 'Perf::Trace::Util', + VERSION_FROM => 'lib/Perf/Trace/Util.pm', # finds $VERSION + PREREQ_PM => {}, # e.g., Module::Name => 1.1 + ($] >= 5.005 ? ## Add these new keywords supported since 5.005 + (ABSTRACT_FROM => 'lib/Perf/Trace/Util.pm', # retrieve abstract from module + AUTHOR => 'Tom Zanussi ') : ()), +); diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README new file mode 100644 index 0000000..0a58378 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/README @@ -0,0 +1,35 @@ +Perf-Trace-Util version 0.01 +============================ + +This module contains utility functions for use with perf trace. + +INSTALLATION + +Building perf with perf trace Perl scripting should install this +module in the right place. + +You should make sure libperl is installed first e.g. apt-get install +libperl-dev. + +DEPENDENCIES + +This module requires these other modules and libraries: + + blah blah blah + +COPYRIGHT AND LICENCE + +Put the correct copyright and licence information here. + +Copyright (C) 2009 by Tom Zanussi + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.10.0 or, +at your option, any later version of Perl 5 you may have available. + +Alternatively, this software may be distributed under the terms of the +GNU General Public License ("GPL") version 2 as published by the Free +Software Foundation. + + + diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm new file mode 100644 index 0000000..fd250fb --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm @@ -0,0 +1,157 @@ +package Perf::Trace::Core; + +use 5.010000; +use strict; +use warnings; + +require Exporter; + +our @ISA = qw(Exporter); + +our %EXPORT_TAGS = ( 'all' => [ qw( +) ] ); + +our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); + +our @EXPORT = qw( +define_flag_field define_flag_value flag_str dump_flag_fields +define_symbolic_field define_symbolic_value symbol_str dump_symbolic_fields +); + +our $VERSION = '0.01'; + +my %flag_fields; +my %symbolic_fields; + +sub flag_str +{ + my ($event_name, $field_name, $value) = @_; + + my $string; + + if ($flag_fields{$event_name}{$field_name}) { + my $print_delim = 0; + foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event_name}{$field_name}{"values"}}) { + if (!$value && !$idx) { + $string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}"; + last; + } + if ($idx && ($value & $idx) == $idx) { + if ($print_delim && $flag_fields{$event_name}{$field_name}{'delim'}) { + $string .= " $flag_fields{$event_name}{$field_name}{'delim'} "; + } + $string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}"; + $print_delim = 1; + $value &= ~$idx; + } + } + } + + return $string; +} + +sub define_flag_field +{ + my ($event_name, $field_name, $delim) = @_; + + $flag_fields{$event_name}{$field_name}{"delim"} = $delim; +} + +sub define_flag_value +{ + my ($event_name, $field_name, $value, $field_str) = @_; + + $flag_fields{$event_name}{$field_name}{"values"}{$value} = $field_str; +} + +sub dump_flag_fields +{ + for my $event (keys %flag_fields) { + print "event $event:\n"; + for my $field (keys %{$flag_fields{$event}}) { + print " field: $field:\n"; + print " delim: $flag_fields{$event}{$field}{'delim'}\n"; + foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event}{$field}{"values"}}) { + print " value $idx: $flag_fields{$event}{$field}{'values'}{$idx}\n"; + } + } + } +} + +sub symbol_str +{ + my ($event_name, $field_name, $value) = @_; + + if ($symbolic_fields{$event_name}{$field_name}) { + foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event_name}{$field_name}{"values"}}) { + if (!$value && !$idx) { + return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}"; + last; + } + if ($value == $idx) { + return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}"; + } + } + } + + return undef; +} + +sub define_symbolic_field +{ + my ($event_name, $field_name) = @_; + + # nothing to do, really +} + +sub define_symbolic_value +{ + my ($event_name, $field_name, $value, $field_str) = @_; + + $symbolic_fields{$event_name}{$field_name}{"values"}{$value} = $field_str; +} + +sub dump_symbolic_fields +{ + for my $event (keys %symbolic_fields) { + print "event $event:\n"; + for my $field (keys %{$symbolic_fields{$event}}) { + print " field: $field:\n"; + foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event}{$field}{"values"}}) { + print " value $idx: $symbolic_fields{$event}{$field}{'values'}{$idx}\n"; + } + } + } +} + +1; +__END__ +=head1 NAME + +Perf::Trace::Core - Perl extension for perf trace + +=head1 SYNOPSIS + + use Perf::Trace::Core + +=head1 SEE ALSO + +Perf (trace) documentation + +=head1 AUTHOR + +Tom Zanussi, Etzanussi@gmail.com + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2009 by Tom Zanussi + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.10.0 or, +at your option, any later version of Perl 5 you may have available. + +Alternatively, this software may be distributed under the terms of the +GNU General Public License ("GPL") version 2 as published by the Free +Software Foundation. + +=cut diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm new file mode 100644 index 0000000..052f132 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm @@ -0,0 +1,88 @@ +package Perf::Trace::Util; + +use 5.010000; +use strict; +use warnings; + +require Exporter; + +our @ISA = qw(Exporter); + +our %EXPORT_TAGS = ( 'all' => [ qw( +) ] ); + +our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); + +our @EXPORT = qw( +avg nsecs nsecs_secs nsecs_nsecs nsecs_usecs print_nsecs +); + +our $VERSION = '0.01'; + +sub avg +{ + my ($total, $n) = @_; + + return $total / $n; +} + +my $NSECS_PER_SEC = 1000000000; + +sub nsecs +{ + my ($secs, $nsecs) = @_; + + return $secs * $NSECS_PER_SEC + $nsecs; +} + +sub nsecs_secs { + my ($nsecs) = @_; + + return $nsecs / $NSECS_PER_SEC; +} + +sub nsecs_nsecs { + my ($nsecs) = @_; + + return $nsecs - nsecs_secs($nsecs); +} + +sub nsecs_str { + my ($nsecs) = @_; + + my $str = sprintf("%5u.%09u", nsecs_secs($nsecs), nsecs_nsecs($nsecs)); + + return $str; +} + +1; +__END__ +=head1 NAME + +Perf::Trace::Util - Perl extension for perf trace + +=head1 SYNOPSIS + + use Perf::Trace::Util; + +=head1 SEE ALSO + +Perf (trace) documentation + +=head1 AUTHOR + +Tom Zanussi, Etzanussi@gmail.com + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2009 by Tom Zanussi + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.10.0 or, +at your option, any later version of Perl 5 you may have available. + +Alternatively, this software may be distributed under the terms of the +GNU General Public License ("GPL") version 2 as published by the Free +Software Foundation. + +=cut diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl new file mode 100644 index 0000000..61f9156 --- /dev/null +++ b/tools/perf/scripts/perl/rw-by-file.pl @@ -0,0 +1,105 @@ +#!/usr/bin/perl -w +# (c) 2009, Tom Zanussi +# Licensed under the terms of the GNU GPL License version 2 + +# Display r/w activity for files read/written to for a given program + +# The common_* event handler fields are the most useful fields common to +# all events. They don't necessarily correspond to the 'common_*' fields +# in the status files. Those fields not available as handler params can +# be retrieved via script functions of the form get_common_*(). + +use 5.010000; +use strict; +use warnings; + +use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; +use lib "./Perf-Trace-Util/lib"; +use Perf::Trace::Core; +use Perf::Trace::Util; + +# change this to the comm of the program you're interested in +my $for_comm = "perf"; + +my %reads; +my %writes; + +sub syscalls::sys_enter_read +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; + + if ($common_comm eq $for_comm) { + $reads{$fd}{bytes_requested} += $count; + $reads{$fd}{total_reads}++; + } +} + +sub syscalls::sys_enter_write +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; + + if ($common_comm eq $for_comm) { + $writes{$fd}{bytes_written} += $count; + $writes{$fd}{total_writes}++; + } +} + +sub trace_end +{ + printf("file read counts for $for_comm:\n\n"); + + printf("%6s %10s %10s\n", "fd", "# reads", "bytes_requested"); + printf("%6s %10s %10s\n", "------", "----------", "-----------"); + + foreach my $fd (sort {$reads{$b}{bytes_requested} <=> + $reads{$a}{bytes_requested}} keys %reads) { + my $total_reads = $reads{$fd}{total_reads}; + my $bytes_requested = $reads{$fd}{bytes_requested}; + printf("%6u %10u %10u\n", $fd, $total_reads, $bytes_requested); + } + + printf("\nfile write counts for $for_comm:\n\n"); + + printf("%6s %10s %10s\n", "fd", "# writes", "bytes_written"); + printf("%6s %10s %10s\n", "------", "----------", "-----------"); + + foreach my $fd (sort {$writes{$b}{bytes_written} <=> + $writes{$a}{bytes_written}} keys %writes) { + my $total_writes = $writes{$fd}{total_writes}; + my $bytes_written = $writes{$fd}{bytes_written}; + printf("%6u %10u %10u\n", $fd, $total_writes, $bytes_written); + } + + print_unhandled(); +} + +my %unhandled; + +sub print_unhandled +{ + if ((scalar keys %unhandled) == 0) { + return; + } + + print "\nunhandled events:\n\n"; + + printf("%-40s %10s\n", "event", "count"); + printf("%-40s %10s\n", "----------------------------------------", + "-----------"); + + foreach my $event_name (keys %unhandled) { + printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); + } +} + +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm) = @_; + + $unhandled{$event_name}++; +} + + diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl new file mode 100644 index 0000000..da601fa --- /dev/null +++ b/tools/perf/scripts/perl/rw-by-pid.pl @@ -0,0 +1,170 @@ +#!/usr/bin/perl -w +# (c) 2009, Tom Zanussi +# Licensed under the terms of the GNU GPL License version 2 + +# Display r/w activity for all processes + +# The common_* event handler fields are the most useful fields common to +# all events. They don't necessarily correspond to the 'common_*' fields +# in the status files. Those fields not available as handler params can +# be retrieved via script functions of the form get_common_*(). + +use 5.010000; +use strict; +use warnings; + +use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; +use lib "./Perf-Trace-Util/lib"; +use Perf::Trace::Core; +use Perf::Trace::Util; + +my %reads; +my %writes; + +sub syscalls::sys_exit_read +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $nr, $ret) = @_; + + if ($ret > 0) { + $reads{$common_pid}{bytes_read} += $ret; + } else { + if (!defined ($reads{$common_pid}{bytes_read})) { + $reads{$common_pid}{bytes_read} = 0; + } + $reads{$common_pid}{errors}{$ret}++; + } +} + +sub syscalls::sys_enter_read +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $nr, $fd, $buf, $count) = @_; + + $reads{$common_pid}{bytes_requested} += $count; + $reads{$common_pid}{total_reads}++; + $reads{$common_pid}{comm} = $common_comm; +} + +sub syscalls::sys_exit_write +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $nr, $ret) = @_; + + if ($ret <= 0) { + $writes{$common_pid}{errors}{$ret}++; + } +} + +sub syscalls::sys_enter_write +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $nr, $fd, $buf, $count) = @_; + + $writes{$common_pid}{bytes_written} += $count; + $writes{$common_pid}{total_writes}++; + $writes{$common_pid}{comm} = $common_comm; +} + +sub trace_end +{ + printf("read counts by pid:\n\n"); + + printf("%6s %20s %10s %10s %10s\n", "pid", "comm", + "# reads", "bytes_requested", "bytes_read"); + printf("%6s %-20s %10s %10s %10s\n", "------", "--------------------", + "-----------", "----------", "----------"); + + foreach my $pid (sort {$reads{$b}{bytes_read} <=> + $reads{$a}{bytes_read}} keys %reads) { + my $comm = $reads{$pid}{comm}; + my $total_reads = $reads{$pid}{total_reads}; + my $bytes_requested = $reads{$pid}{bytes_requested}; + my $bytes_read = $reads{$pid}{bytes_read}; + + printf("%6s %-20s %10s %10s %10s\n", $pid, $comm, + $total_reads, $bytes_requested, $bytes_read); + } + + printf("\nfailed reads by pid:\n\n"); + + printf("%6s %20s %6s %10s\n", "pid", "comm", "error #", "# errors"); + printf("%6s %20s %6s %10s\n", "------", "--------------------", + "------", "----------"); + + foreach my $pid (keys %reads) { + my $comm = $reads{$pid}{comm}; + foreach my $err (sort {$reads{$b}{comm} cmp $reads{$a}{comm}} + keys %{$reads{$pid}{errors}}) { + my $errors = $reads{$pid}{errors}{$err}; + + printf("%6d %-20s %6d %10s\n", $pid, $comm, $err, $errors); + } + } + + printf("\nwrite counts by pid:\n\n"); + + printf("%6s %20s %10s %10s\n", "pid", "comm", + "# writes", "bytes_written"); + printf("%6s %-20s %10s %10s\n", "------", "--------------------", + "-----------", "----------"); + + foreach my $pid (sort {$writes{$b}{bytes_written} <=> + $writes{$a}{bytes_written}} keys %writes) { + my $comm = $writes{$pid}{comm}; + my $total_writes = $writes{$pid}{total_writes}; + my $bytes_written = $writes{$pid}{bytes_written}; + + printf("%6s %-20s %10s %10s\n", $pid, $comm, + $total_writes, $bytes_written); + } + + printf("\nfailed writes by pid:\n\n"); + + printf("%6s %20s %6s %10s\n", "pid", "comm", "error #", "# errors"); + printf("%6s %20s %6s %10s\n", "------", "--------------------", + "------", "----------"); + + foreach my $pid (keys %writes) { + my $comm = $writes{$pid}{comm}; + foreach my $err (sort {$writes{$b}{comm} cmp $writes{$a}{comm}} + keys %{$writes{$pid}{errors}}) { + my $errors = $writes{$pid}{errors}{$err}; + + printf("%6d %-20s %6d %10s\n", $pid, $comm, $err, $errors); + } + } + + print_unhandled(); +} + +my %unhandled; + +sub print_unhandled +{ + if ((scalar keys %unhandled) == 0) { + return; + } + + print "\nunhandled events:\n\n"; + + printf("%-40s %10s\n", "event", "count"); + printf("%-40s %10s\n", "----------------------------------------", + "-----------"); + + foreach my $event_name (keys %unhandled) { + printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); + } +} + +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm) = @_; + + $unhandled{$event_name}++; +} diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl new file mode 100644 index 0000000..ed58ef2 --- /dev/null +++ b/tools/perf/scripts/perl/wakeup-latency.pl @@ -0,0 +1,103 @@ +#!/usr/bin/perl -w +# (c) 2009, Tom Zanussi +# Licensed under the terms of the GNU GPL License version 2 + +# Display avg/min/max wakeup latency + +# The common_* event handler fields are the most useful fields common to +# all events. They don't necessarily correspond to the 'common_*' fields +# in the status files. Those fields not available as handler params can +# be retrieved via script functions of the form get_common_*(). + +use 5.010000; +use strict; +use warnings; + +use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; +use lib "./Perf-Trace-Util/lib"; +use Perf::Trace::Core; +use Perf::Trace::Util; + +my %last_wakeup; + +my $max_wakeup_latency; +my $min_wakeup_latency; +my $total_wakeup_latency; +my $total_wakeups; + +sub sched::sched_switch +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid, + $next_prio) = @_; + + my $wakeup_ts = $last_wakeup{$common_cpu}{ts}; + if ($wakeup_ts) { + my $switch_ts = nsecs($common_secs, $common_nsecs); + my $wakeup_latency = $switch_ts - $wakeup_ts; + if ($wakeup_latency > $max_wakeup_latency) { + $max_wakeup_latency = $wakeup_latency; + } + if ($wakeup_latency < $min_wakeup_latency) { + $min_wakeup_latency = $wakeup_latency; + } + $total_wakeup_latency += $wakeup_latency; + $total_wakeups++; + } + $last_wakeup{$common_cpu}{ts} = 0; +} + +sub sched::sched_wakeup +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $comm, $pid, $prio, $success, $target_cpu) = @_; + + $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs); +} + +sub trace_begin +{ + $min_wakeup_latency = 1000000000; + $max_wakeup_latency = 0; +} + +sub trace_end +{ + printf("wakeup_latency stats:\n\n"); + print "total_wakeups: $total_wakeups\n"; + printf("avg_wakeup_latency (ns): %u\n", + avg($total_wakeup_latency, $total_wakeups)); + printf("min_wakeup_latency (ns): %u\n", $min_wakeup_latency); + printf("max_wakeup_latency (ns): %u\n", $max_wakeup_latency); + + print_unhandled(); +} + +my %unhandled; + +sub print_unhandled +{ + if ((scalar keys %unhandled) == 0) { + return; + } + + print "\nunhandled events:\n\n"; + + printf("%-40s %10s\n", "event", "count"); + printf("%-40s %10s\n", "----------------------------------------", + "-----------"); + + foreach my $event_name (keys %unhandled) { + printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); + } +} + +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm) = @_; + + $unhandled{$event_name}++; +} diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl new file mode 100644 index 0000000..511302c --- /dev/null +++ b/tools/perf/scripts/perl/workqueue-stats.pl @@ -0,0 +1,129 @@ +#!/usr/bin/perl -w +# (c) 2009, Tom Zanussi +# Licensed under the terms of the GNU GPL License version 2 + +# Displays workqueue stats +# +# Usage: +# +# perf record -c 1 -f -a -R -e workqueue:workqueue_creation -e +# workqueue:workqueue_destruction -e workqueue:workqueue_execution +# -e workqueue:workqueue_insertion +# +# perf trace -p -s tools/perf/scripts/perl/workqueue-stats.pl + +use 5.010000; +use strict; +use warnings; + +use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; +use lib "./Perf-Trace-Util/lib"; +use Perf::Trace::Core; +use Perf::Trace::Util; + +my @cpus; + +sub workqueue::workqueue_destruction +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $thread_comm, $thread_pid) = @_; + + $cpus[$common_cpu]{$thread_pid}{destroyed}++; + $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; +} + +sub workqueue::workqueue_creation +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $thread_comm, $thread_pid, $cpu) = @_; + + $cpus[$common_cpu]{$thread_pid}{created}++; + $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; +} + +sub workqueue::workqueue_execution +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $thread_comm, $thread_pid, $func) = @_; + + $cpus[$common_cpu]{$thread_pid}{executed}++; + $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; +} + +sub workqueue::workqueue_insertion +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $thread_comm, $thread_pid, $func) = @_; + + $cpus[$common_cpu]{$thread_pid}{inserted}++; + $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; +} + +sub trace_end +{ + print "workqueue work stats:\n\n"; + my $cpu = 0; + printf("%3s %6s %6s\t%-20s\n", "cpu", "ins", "exec", "name"); + printf("%3s %6s %6s\t%-20s\n", "---", "---", "----", "----"); + foreach my $pidhash (@cpus) { + while ((my $pid, my $wqhash) = each %$pidhash) { + my $ins = $$wqhash{'inserted'}; + my $exe = $$wqhash{'executed'}; + my $comm = $$wqhash{'comm'}; + if ($ins || $exe) { + printf("%3u %6u %6u\t%-20s\n", $cpu, $ins, $exe, $comm); + } + } + $cpu++; + } + + $cpu = 0; + print "\nworkqueue lifecycle stats:\n\n"; + printf("%3s %6s %6s\t%-20s\n", "cpu", "created", "destroyed", "name"); + printf("%3s %6s %6s\t%-20s\n", "---", "-------", "---------", "----"); + foreach my $pidhash (@cpus) { + while ((my $pid, my $wqhash) = each %$pidhash) { + my $created = $$wqhash{'created'}; + my $destroyed = $$wqhash{'destroyed'}; + my $comm = $$wqhash{'comm'}; + if ($created || $destroyed) { + printf("%3u %6u %6u\t%-20s\n", $cpu, $created, $destroyed, + $comm); + } + } + $cpu++; + } + + print_unhandled(); +} + +my %unhandled; + +sub print_unhandled +{ + if ((scalar keys %unhandled) == 0) { + return; + } + + print "\nunhandled events:\n\n"; + + printf("%-40s %10s\n", "event", "count"); + printf("%-40s %10s\n", "----------------------------------------", + "-----------"); + + foreach my $event_name (keys %unhandled) { + printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); + } +} + +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm) = @_; + + $unhandled{$event_name}++; +} -- cgit v0.10.2 From d1b93772be78486397693fc39d3ddea3fda90105 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 25 Nov 2009 01:15:50 -0600 Subject: perf trace: Add interface to access perf data from Perl handlers The Perl scripting support for perf trace allows most of a trace event's data to be accessed directly as handler arguments, but not all of it e.g. the less common fields aren't passed in. To give scripts access to the other fields and/or any other data or metadata in the main perf executable that might be useful, a way to access the C data in perf from Perl is needed; this patch uses the Perl XS facility to do it for the common_xxx event fields not passed to handler functions. Context.pm exports three functions to Perl scripts that access fields for the current event by calling back into perf: common_pc(), common_flags() and common_lock_depth(). Support for common_flags() field values was added to Core.pm and a script used to sanity check these and other basic scripting features, check-perf-trace.pl, was also added. Signed-off-by: Tom Zanussi Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: anton@samba.org Cc: hch@infradead.org LKML-Reference: <1259133352-23685-6-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index efbc0e8..8ad57b5 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -497,6 +497,7 @@ ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; e BASIC_CFLAGS += -DNO_LIBPERL else ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) + LIB_OBJS += scripts/perl/Perf-Trace-Util/Context.o endif ifdef NO_DEMANGLE @@ -873,6 +874,9 @@ util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS util/trace-event-perl.o: util/trace-event-perl.c PERF-CFLAGS $(QUIET_CC)$(CC) -o util/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter $< +scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS + $(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< + perf-%$X: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -1072,7 +1076,7 @@ distclean: clean # $(RM) configure clean: - $(RM) *.o */*.o $(LIB_FILE) + $(RM) *.o */*.o */*/*.o */*/*/*.o $(LIB_FILE) $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X $(RM) $(TEST_PROGRAMS) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope* diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c new file mode 100644 index 0000000..3ba3ffc --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c @@ -0,0 +1,134 @@ +/* + * This file was generated automatically by ExtUtils::ParseXS version 2.18_02 from the + * contents of Context.xs. Do not edit this file, edit Context.xs instead. + * + * ANY CHANGES MADE HERE WILL BE LOST! + * + */ + +#line 1 "Context.xs" +/* + * Context.xs. XS interfaces for perf trace. + * + * Copyright (C) 2009 Tom Zanussi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#include "../../../util/trace-event-perl.h" + +#ifndef PERL_UNUSED_VAR +# define PERL_UNUSED_VAR(var) if (0) var = var +#endif + +#line 41 "Context.c" + +XS(XS_Perf__Trace__Context_get_common_pc); /* prototype to pass -Wmissing-prototypes */ +XS(XS_Perf__Trace__Context_get_common_pc) +{ +#ifdef dVAR + dVAR; dXSARGS; +#else + dXSARGS; +#endif + if (items != 1) + Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::get_common_pc", "context"); + PERL_UNUSED_VAR(cv); /* -W */ + { + struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0))); + int RETVAL; + dXSTARG; + + RETVAL = get_common_pc(context); + XSprePUSH; PUSHi((IV)RETVAL); + } + XSRETURN(1); +} + + +XS(XS_Perf__Trace__Context_get_common_flags); /* prototype to pass -Wmissing-prototypes */ +XS(XS_Perf__Trace__Context_get_common_flags) +{ +#ifdef dVAR + dVAR; dXSARGS; +#else + dXSARGS; +#endif + if (items != 1) + Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::get_common_flags", "context"); + PERL_UNUSED_VAR(cv); /* -W */ + { + struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0))); + int RETVAL; + dXSTARG; + + RETVAL = get_common_flags(context); + XSprePUSH; PUSHi((IV)RETVAL); + } + XSRETURN(1); +} + + +XS(XS_Perf__Trace__Context_get_common_lock_depth); /* prototype to pass -Wmissing-prototypes */ +XS(XS_Perf__Trace__Context_get_common_lock_depth) +{ +#ifdef dVAR + dVAR; dXSARGS; +#else + dXSARGS; +#endif + if (items != 1) + Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::get_common_lock_depth", "context"); + PERL_UNUSED_VAR(cv); /* -W */ + { + struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0))); + int RETVAL; + dXSTARG; + + RETVAL = get_common_lock_depth(context); + XSprePUSH; PUSHi((IV)RETVAL); + } + XSRETURN(1); +} + +#ifdef __cplusplus +extern "C" +#endif +XS(boot_Perf__Trace__Context); /* prototype to pass -Wmissing-prototypes */ +XS(boot_Perf__Trace__Context) +{ +#ifdef dVAR + dVAR; dXSARGS; +#else + dXSARGS; +#endif + const char* file = __FILE__; + + PERL_UNUSED_VAR(cv); /* -W */ + PERL_UNUSED_VAR(items); /* -W */ + XS_VERSION_BOOTCHECK ; + + newXSproto("Perf::Trace::Context::get_common_pc", XS_Perf__Trace__Context_get_common_pc, file, "$"); + newXSproto("Perf::Trace::Context::get_common_flags", XS_Perf__Trace__Context_get_common_flags, file, "$"); + newXSproto("Perf::Trace::Context::get_common_lock_depth", XS_Perf__Trace__Context_get_common_lock_depth, file, "$"); + if (PL_unitcheckav) + call_list(PL_scopestack_ix, PL_unitcheckav); + XSRETURN_YES; +} + diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs new file mode 100644 index 0000000..24facb3 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs @@ -0,0 +1,41 @@ +/* + * Context.xs. XS interfaces for perf trace. + * + * Copyright (C) 2009 Tom Zanussi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include "EXTERN.h" +#include "perl.h" +#include "XSUB.h" +#include "../../../util/trace-event-perl.h" + +MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context +PROTOTYPES: ENABLE + +int +get_common_pc(context) + struct scripting_context * context + +int +get_common_flags(context) + struct scripting_context * context + +int +get_common_lock_depth(context) + struct scripting_context * context + diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL index b0de02e..decdeb0 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL @@ -3,10 +3,15 @@ use ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( - NAME => 'Perf::Trace::Util', - VERSION_FROM => 'lib/Perf/Trace/Util.pm', # finds $VERSION + NAME => 'Perf::Trace::Context', + VERSION_FROM => 'lib/Perf/Trace/Context.pm', # finds $VERSION PREREQ_PM => {}, # e.g., Module::Name => 1.1 ($] >= 5.005 ? ## Add these new keywords supported since 5.005 - (ABSTRACT_FROM => 'lib/Perf/Trace/Util.pm', # retrieve abstract from module + (ABSTRACT_FROM => 'lib/Perf/Trace/Context.pm', # retrieve abstract from module AUTHOR => 'Tom Zanussi ') : ()), + LIBS => [''], # e.g., '-lm' + DEFINE => '-I ../..', # e.g., '-DHAVE_SOMETHING' + INC => '-I.', # e.g., '-I. -I/usr/include/other' + # Un-comment this if you add C files to link with later: + OBJECT => 'Context.o', # link all the C files too ); diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README index 0a58378..adb99aa 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/README +++ b/tools/perf/scripts/perl/Perf-Trace-Util/README @@ -3,6 +3,34 @@ Perf-Trace-Util version 0.01 This module contains utility functions for use with perf trace. +Core.pm and Util.pm are pure Perl modules; Core.pm contains routines +that the core perf support for Perl calls on and should always be +'used', while Util.pm contains useful but optional utility functions +that scripts may want to use. Context.pm contains the Perl->C +interface that allows scripts to access data in the embedding perf +executable; scripts wishing to do that should 'use Context.pm'. + +The Perl->C perf interface is completely driven by Context.xs. If you +want to add new Perl functions that end up accessing C data in the +perf executable, you add desciptions of the new functions here. +scripting_context is a pointer to the perf data in the perf executable +that you want to access - it's passed as the second parameter, +$context, to all handler functions. + +After you do that: + + perl Makefile.PL # to create a Makefile for the next step + make # to create Context.c + + edit Context.c to add const to the char* file = __FILE__ line in + XS(boot_Perf__Trace__Context) to silence a warning/error. + + You can delete the Makefile, object files and anything else that was + generated e.g. blib and shared library, etc, except for of course + Context.c + + You should then be able to run the normal perf make as usual. + INSTALLATION Building perf with perf trace Perl scripting should install this @@ -15,12 +43,10 @@ DEPENDENCIES This module requires these other modules and libraries: - blah blah blah + None COPYRIGHT AND LICENCE -Put the correct copyright and licence information here. - Copyright (C) 2009 by Tom Zanussi This library is free software; you can redistribute it and/or modify @@ -31,5 +57,3 @@ Alternatively, this software may be distributed under the terms of the GNU General Public License ("GPL") version 2 as published by the Free Software Foundation. - - diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm new file mode 100644 index 0000000..6c7f3659 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm @@ -0,0 +1,55 @@ +package Perf::Trace::Context; + +use 5.010000; +use strict; +use warnings; + +require Exporter; + +our @ISA = qw(Exporter); + +our %EXPORT_TAGS = ( 'all' => [ qw( +) ] ); + +our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); + +our @EXPORT = qw( + common_pc common_flags common_lock_depth +); + +our $VERSION = '0.01'; + +require XSLoader; +XSLoader::load('Perf::Trace::Context', $VERSION); + +1; +__END__ +=head1 NAME + +Perf::Trace::Context - Perl extension for accessing functions in perf. + +=head1 SYNOPSIS + + use Perf::Trace::Context; + +=head1 SEE ALSO + +Perf (trace) documentation + +=head1 AUTHOR + +Tom Zanussi, Etzanussi@gmail.com + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2009 by Tom Zanussi + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.10.0 or, +at your option, any later version of Perl 5 you may have available. + +Alternatively, this software may be distributed under the terms of the +GNU General Public License ("GPL") version 2 as published by the Free +Software Foundation. + +=cut diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm index fd250fb..9df376a 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm +++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm @@ -16,10 +16,45 @@ our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); our @EXPORT = qw( define_flag_field define_flag_value flag_str dump_flag_fields define_symbolic_field define_symbolic_value symbol_str dump_symbolic_fields +trace_flag_str ); our $VERSION = '0.01'; +my %trace_flags = (0x00 => "NONE", + 0x01 => "IRQS_OFF", + 0x02 => "IRQS_NOSUPPORT", + 0x04 => "NEED_RESCHED", + 0x08 => "HARDIRQ", + 0x10 => "SOFTIRQ"); + +sub trace_flag_str +{ + my ($value) = @_; + + my $string; + + my $print_delim = 0; + + foreach my $idx (sort {$a <=> $b} keys %trace_flags) { + if (!$value && !$idx) { + $string .= "NONE"; + last; + } + + if ($idx && ($value & $idx) == $idx) { + if ($print_delim) { + $string .= " | "; + } + $string .= "$trace_flags{$idx}"; + $print_delim = 1; + $value &= ~$idx; + } + } + + return $string; +} + my %flag_fields; my %symbolic_fields; diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/typemap b/tools/perf/scripts/perl/Perf-Trace-Util/typemap new file mode 100644 index 0000000..8408368 --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/typemap @@ -0,0 +1 @@ +struct scripting_context * T_PTR diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl new file mode 100644 index 0000000..4e7dc0a --- /dev/null +++ b/tools/perf/scripts/perl/check-perf-trace.pl @@ -0,0 +1,106 @@ +# perf trace event handlers, generated by perf trace -g perl +# (c) 2009, Tom Zanussi +# Licensed under the terms of the GNU GPL License version 2 + +# This script tests basic functionality such as flag and symbol +# strings, common_xxx() calls back into perf, begin, end, unhandled +# events, etc. Basically, if this script runs successfully and +# displays expected results, perl scripting support should be ok. + +use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; +use lib "./Perf-Trace-Util/lib"; +use Perf::Trace::Core; +use Perf::Trace::Context; +use Perf::Trace::Util; + +sub trace_begin +{ + print "trace_begin\n"; +} + +sub trace_end +{ + print "trace_end\n"; + + print_unhandled(); +} + +sub irq::softirq_entry +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $vec) = @_; + + print_header($event_name, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm); + + print_uncommon($context); + + printf("vec=%s\n", + symbol_str("irq::softirq_entry", "vec", $vec)); +} + +sub kmem::kmalloc +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm, + $call_site, $ptr, $bytes_req, $bytes_alloc, + $gfp_flags) = @_; + + print_header($event_name, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm); + + print_uncommon($context); + + printf("call_site=%p, ptr=%p, bytes_req=%u, bytes_alloc=%u, ". + "gfp_flags=%s\n", + $call_site, $ptr, $bytes_req, $bytes_alloc, + + flag_str("kmem::kmalloc", "gfp_flags", $gfp_flags)); +} + +# print trace fields not included in handler args +sub print_uncommon +{ + my ($context) = @_; + + printf("common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, ", + common_pc($context), trace_flag_str(common_flags($context)), + common_lock_depth($context)); + +} + +my %unhandled; + +sub print_unhandled +{ + if ((scalar keys %unhandled) == 0) { + return; + } + + print "\nunhandled events:\n\n"; + + printf("%-40s %10s\n", "event", "count"); + printf("%-40s %10s\n", "----------------------------------------", + "-----------"); + + foreach my $event_name (keys %unhandled) { + printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); + } +} + +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, + $common_pid, $common_comm) = @_; + + $unhandled{$event_name}++; +} + +sub print_header +{ + my ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_; + + printf("%-20s %5u %05u.%09u %8u %-20s ", + $event_name, $cpu, $secs, $nsecs, $pid, $comm); +} diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 1f16495..0302405 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -1982,7 +1982,7 @@ int trace_parse_common_pid(void *data) "common_pid"); } -static int parse_common_pc(void *data) +int parse_common_pc(void *data) { static int pc_offset; static int pc_size; @@ -1991,7 +1991,7 @@ static int parse_common_pc(void *data) "common_preempt_count"); } -static int parse_common_flags(void *data) +int parse_common_flags(void *data) { static int flags_offset; static int flags_size; @@ -2000,7 +2000,7 @@ static int parse_common_flags(void *data) "common_flags"); } -static int parse_common_lock_depth(void *data) +int parse_common_lock_depth(void *data) { static int ld_offset; static int ld_size; diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c index c56b08d..d179ade 100644 --- a/tools/perf/util/trace-event-perl.c +++ b/tools/perf/util/trace-event-perl.c @@ -30,6 +30,21 @@ #include "trace-event.h" #include "trace-event-perl.h" +void xs_init(pTHX); + +void boot_Perf__Trace__Context(pTHX_ CV *cv); +void boot_DynaLoader(pTHX_ CV *cv); + +void xs_init(pTHX) +{ + const char *file = __FILE__; + dXSUB_SYS; + + newXS("Perf::Trace::Context::bootstrap", boot_Perf__Trace__Context, + file); + newXS("DynaLoader::boot_DynaLoader", boot_DynaLoader, file); +} + INTERP my_perl; #define FTRACE_MAX_EVENT \ @@ -227,6 +242,33 @@ static inline struct event *find_cache_event(int type) return event; } +int get_common_pc(struct scripting_context *context) +{ + int pc; + + pc = parse_common_pc(context->event_data); + + return pc; +} + +int get_common_flags(struct scripting_context *context) +{ + int flags; + + flags = parse_common_flags(context->event_data); + + return flags; +} + +int get_common_lock_depth(struct scripting_context *context) +{ + int lock_depth; + + lock_depth = parse_common_lock_depth(context->event_data); + + return lock_depth; +} + static void perl_process_event(int cpu, void *data, int size __attribute((unused)), unsigned long long nsecs, char *comm) @@ -290,6 +332,7 @@ static void perl_process_event(int cpu, void *data, } PUTBACK; + if (get_cv(handler, 0)) call_pv(handler, G_SCALAR); else if (get_cv("main::trace_unhandled", 0)) { @@ -328,7 +371,8 @@ static int perl_start_script(const char *script) my_perl = perl_alloc(); perl_construct(my_perl); - if (perl_parse(my_perl, NULL, 2, (char **)command_line, (char **)NULL)) + if (perl_parse(my_perl, xs_init, 2, (char **)command_line, + (char **)NULL)) return -1; perl_run(my_perl); diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h index 6c94fa9..666a864 100644 --- a/tools/perf/util/trace-event-perl.h +++ b/tools/perf/util/trace-event-perl.h @@ -29,6 +29,11 @@ typedef int INTERP; #define perl_run(a) (0) #define perl_destruct(a) (0) #define perl_free(a) (0) +#define pTHX void +#define CV void +#define dXSUB_SYS +#define pTHX_ +static inline void newXS(const char *a, void *b, const char *c) {} #else #include #include @@ -39,4 +44,8 @@ struct scripting_context { void *event_data; }; +int get_common_pc(struct scripting_context *context); +int get_common_flags(struct scripting_context *context); +int get_common_lock_depth(struct scripting_context *context); + #endif /* __PERF_TRACE_EVENT_PERL_H */ diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index b1e58d3..81698d5 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -246,6 +246,9 @@ extern int latency_format; int parse_header_page(char *buf, unsigned long size); int trace_parse_common_type(void *data); int trace_parse_common_pid(void *data); +int parse_common_pc(void *data); +int parse_common_flags(void *data); +int parse_common_lock_depth(void *data); struct event *trace_find_event(int id); struct event *trace_find_next_event(struct event *event); unsigned long long read_size(void *ptr, int size); -- cgit v0.10.2 From 89fbf0b8a021cbf60abeacfb6b538e97c83afada Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 25 Nov 2009 01:15:51 -0600 Subject: perf trace: Add Documentation for perf trace Perl support Adds perf-trace-perl Documentation and a link to it from the perf-trace page. Signed-off-by: Tom Zanussi Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: anton@samba.org Cc: hch@infradead.org LKML-Reference: <1259133352-23685-7-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Documentation/perf-trace-perl.txt b/tools/perf/Documentation/perf-trace-perl.txt new file mode 100644 index 0000000..c5f55f4 --- /dev/null +++ b/tools/perf/Documentation/perf-trace-perl.txt @@ -0,0 +1,219 @@ +perf-trace-perl(1) +================== + +NAME +---- +perf-trace-perl - Process trace data with a Perl script + +SYNOPSIS +-------- +[verse] +'perf trace' [-s [lang]:script[.ext] ] + +DESCRIPTION +----------- + +This perf trace option is used to process perf trace data using perf's +built-in Perl interpreter. It reads and processes the input file and +displays the results of the trace analysis implemented in the given +Perl script, if any. + +STARTER SCRIPTS +--------------- + +You can avoid reading the rest of this document by running 'perf trace +-g perl' in the same directory as an existing perf.data trace file. +That will generate a starter script containing a handler for each of +the event types in the trace file; it simply prints every available +field for each event in the trace file. + +You can also look at the existing scripts in +~/libexec/perf-core/scripts/perl for typical examples showing how to +do basic things like aggregate event data, print results, etc. Also, +the check-perf-trace.pl script, while not interesting for its results, +attempts to exercise all of the main scripting features. + +EVENT HANDLERS +-------------- + +When perf trace is invoked using a trace script, a user-defined +'handler function' is called for each event in the trace. If there's +no handler function defined for a given event type, the event is +ignored (or passed to a 'trace_handled' function, see below) and the +next event is processed. + +Most of the event's field values are passed as arguments to the +handler function; some of the less common ones aren't - those are +available as calls back into the perf executable (see below). + +As an example, the following perf record command can be used to record +all sched_wakeup events in the system: + + # perf record -c 1 -f -a -M -R -e sched:sched_wakeup + +Traces meant to be processed using a script should be recorded with +the above options: -c 1 says to sample every event, -a to enable +system-wide collection, -M to multiplex the output, and -R to collect +raw samples. + +The format file for the sched_wakep event defines the following fields +(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): + +---- + format: + field:unsigned short common_type; + field:unsigned char common_flags; + field:unsigned char common_preempt_count; + field:int common_pid; + field:int common_lock_depth; + + field:char comm[TASK_COMM_LEN]; + field:pid_t pid; + field:int prio; + field:int success; + field:int target_cpu; +---- + +The handler function for this event would be defined as: + +---- +sub sched::sched_wakeup +{ + my ($event_name, $context, $common_cpu, $common_secs, + $common_nsecs, $common_pid, $common_comm, + $comm, $pid, $prio, $success, $target_cpu) = @_; +} +---- + +The handler function takes the form subsystem::event_name. + +The $common_* arguments in the handler's argument list are the set of +arguments passed to all event handlers; some of the fields correspond +to the common_* fields in the format file, but some are synthesized, +and some of the common_* fields aren't common enough to to be passed +to every event as arguments but are available as library functions. + +Here's a brief description of each of the invariant event args: + + $event_name the name of the event as text + $context an opaque 'cookie' used in calls back into perf + $common_cpu the cpu the event occurred on + $common_secs the secs portion of the event timestamp + $common_nsecs the nsecs portion of the event timestamp + $common_pid the pid of the current task + $common_comm the name of the current process + +All of the remaining fields in the event's format file have +counterparts as handler function arguments of the same name, as can be +seen in the example above. + +The above provides the basics needed to directly access every field of +every event in a trace, which covers 90% of what you need to know to +write a useful trace script. The sections below cover the rest. + +SCRIPT LAYOUT +------------- + +Every perf trace Perl script should start by setting up a Perl module +search path and 'use'ing a few support modules (see module +descriptions below): + +---- + use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; + use lib "./Perf-Trace-Util/lib"; + use Perf::Trace::Core; + use Perf::Trace::Context; + use Perf::Trace::Util; +---- + +The rest of the script can contain handler functions and support +functions in any order. + +Aside from the event handler functions discussed above, every script +can implement a set of optional functions: + +*trace_begin*, if defined, is called before any event is processed and +gives scripts a chance to do setup tasks: + +---- + sub trace_begin + { + } +---- + +*trace_end*, if defined, is called after all events have been + processed and gives scripts a chance to do end-of-script tasks, such + as display results: + +---- +sub trace_end +{ +} +---- + +*trace_unhandled*, if defined, is called after for any event that + doesn't have a handler explicitly defined for it. The standard set + of common arguments are passed into it: + +---- +sub trace_unhandled +{ + my ($event_name, $context, $common_cpu, $common_secs, + $common_nsecs, $common_pid, $common_comm) = @_; +} +---- + +The remaining sections provide descriptions of each of the available +built-in perf trace Perl modules and their associated functions. + +AVAILABLE MODULES AND FUNCTIONS +------------------------------- + +The following sections describe the functions and variables available +via the various Perf::Trace::* Perl modules. To use the functions and +variables from the given module, add the corresponding 'use +Perf::Trace::XXX' line to your perf trace script. + +Perf::Trace::Core Module +~~~~~~~~~~~~~~~~~~~~~~~~ + +These functions provide some essential functions to user scripts. + +The *flag_str* and *symbol_str* functions provide human-readable +strings for flag and symbolic fields. These correspond to the strings +and values parsed from the 'print fmt' fields of the event format +files: + + flag_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the flag field $field_name of event $event_name + symbol_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the symbolic field $field_name of event $event_name + +Perf::Trace::Context Module +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some of the 'common' fields in the event format file aren't all that +common, but need to be made accessible to user scripts nonetheless. + +Perf::Trace::Context defines a set of functions that can be used to +access this data in the context of the current event. Each of these +functions expects a $context variable, which is the same as the +$context variable passed into every event handler as the second +argument. + + common_pc($context) - returns common_preempt count for the current event + common_flags($context) - returns common_flags for the current event + common_lock_depth($context) - returns common_lock_depth for the current event + +Perf::Trace::Util Module +~~~~~~~~~~~~~~~~~~~~~~~~ + +Various utility functions for use with perf trace: + + nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair + nsecs_secs($nsecs) - returns whole secs portion given nsecs + nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs + nsecs_str($nsecs) - returns printable string in the form secs.nsecs + avg($total, $n) - returns average given a sum and a total number of values + +SEE ALSO +-------- +linkperf:perf-trace[1] diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 41ed753..07065ef 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -20,6 +20,15 @@ OPTIONS --dump-raw-trace=:: Display verbose dump of the trace data. +-s:: +--script=:: + Process trace data with the given script ([lang]:script[.ext]). + +-g:: +--gen-script=:: + Generate perf-trace.[ext] starter script for given language, + using current perf.data. + SEE ALSO -------- -linkperf:perf-record[1] +linkperf:perf-record[1], linkperf:perf-trace-perl[1] -- cgit v0.10.2 From 1ae4a971250c55e473ca53c78011fcf73809885d Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 25 Nov 2009 01:15:52 -0600 Subject: perf trace: Add a scripts/perl/bin for perf trace shell scripts To capture the relevant events for a given Perl script and to avoid having to continually remember and type in long command-lines, add a scripts/perl/bin directory containing two simple shell scripts for each Perl script, one for recording and one for processing/display. For example, to record perf data for the rw-by-pid.pl script, run scripts/perl/bin/rw-by-pid-record and to actually run the script and display the output run scripts/perl/bin/rw-by-pid-report. Signed-off-by: Tom Zanussi Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: anton@samba.org Cc: hch@infradead.org LKML-Reference: <1259133352-23685-8-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record new file mode 100644 index 0000000..c7ec5de --- /dev/null +++ b/tools/perf/scripts/perl/bin/check-perf-trace-record @@ -0,0 +1,7 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry + + + + + diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-report b/tools/perf/scripts/perl/bin/check-perf-trace-report new file mode 100644 index 0000000..89948b0 --- /dev/null +++ b/tools/perf/scripts/perl/bin/check-perf-trace-report @@ -0,0 +1,5 @@ +#!/bin/bash +perf trace -s ~/libexec/perf-core/scripts/perl/check-perf-trace.pl + + + diff --git a/tools/perf/scripts/perl/bin/rw-by-file-record b/tools/perf/scripts/perl/bin/rw-by-file-record new file mode 100644 index 0000000..b25056e --- /dev/null +++ b/tools/perf/scripts/perl/bin/rw-by-file-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report new file mode 100644 index 0000000..f5dcf9c --- /dev/null +++ b/tools/perf/scripts/perl/bin/rw-by-file-report @@ -0,0 +1,5 @@ +#!/bin/bash +perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-file.pl + + + diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-record b/tools/perf/scripts/perl/bin/rw-by-pid-record new file mode 100644 index 0000000..8903979 --- /dev/null +++ b/tools/perf/scripts/perl/bin/rw-by-pid-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report new file mode 100644 index 0000000..cea16f7 --- /dev/null +++ b/tools/perf/scripts/perl/bin/rw-by-pid-report @@ -0,0 +1,5 @@ +#!/bin/bash +perf trace -s ~/libexec/perf-core/scripts/perl/rw-by-pid.pl + + + diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-record b/tools/perf/scripts/perl/bin/wakeup-latency-record new file mode 100644 index 0000000..6abedda --- /dev/null +++ b/tools/perf/scripts/perl/bin/wakeup-latency-record @@ -0,0 +1,6 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup + + + + diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report new file mode 100644 index 0000000..85769dc --- /dev/null +++ b/tools/perf/scripts/perl/bin/wakeup-latency-report @@ -0,0 +1,5 @@ +#!/bin/bash +perf trace -s ~/libexec/perf-core/scripts/perl/wakeup-latency.pl + + + diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record new file mode 100644 index 0000000..fce6637 --- /dev/null +++ b/tools/perf/scripts/perl/bin/workqueue-stats-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report new file mode 100644 index 0000000..aa68435 --- /dev/null +++ b/tools/perf/scripts/perl/bin/workqueue-stats-report @@ -0,0 +1,6 @@ +#!/bin/bash +perf trace -s ~/libexec/perf-core/scripts/perl/workqueue-stats.pl + + + + -- cgit v0.10.2 From cf72344d1ad7b33805ef8d65e758b267e6f4cb8d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 28 Nov 2009 10:11:00 +0100 Subject: perf scripting: Fix build Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ca8ebf1..abb914a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -5,6 +5,8 @@ #include "util/symbol.h" #include "util/thread.h" #include "util/header.h" +#include "util/exec_cmd.h" +#include "util/trace-event.h" static char const *script_name; static char const *generate_script_lang; -- cgit v0.10.2 From f5560da549ea2e32dd41e36548c0e7dee3d4aabb Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:38 +0900 Subject: pcmcia: Pass struct pcmcia_device to pcmcia_release_window() No logic changes, just pass struct pcmcia_device to pcmcia_release_window(). [linux@dominikbrodowski.net: update to 2.6.31] CC: netdev@vger.kernel.org CC: Jiri Kosina Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 24bffa4..91ca23d 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -168,15 +168,15 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, return 0; exit3: - pcmcia_release_window(ipw->handle_attr_memory); + pcmcia_release_window(p_dev, ipw->handle_attr_memory); exit2: if (ipw->common_memory) { release_mem_region(ipw->request_common_memory.Base, ipw->request_common_memory.Size); iounmap(ipw->common_memory); - pcmcia_release_window(ipw->handle_common_memory); + pcmcia_release_window(p_dev, ipw->handle_common_memory); } else - pcmcia_release_window(ipw->handle_common_memory); + pcmcia_release_window(p_dev, ipw->handle_common_memory); exit1: release_resource(io_resource); pcmcia_disable_device(p_dev); @@ -260,13 +260,13 @@ exit: release_mem_region(ipw->request_attr_memory.Base, ipw->request_attr_memory.Size); iounmap(ipw->attr_memory); - pcmcia_release_window(ipw->handle_attr_memory); + pcmcia_release_window(link, ipw->handle_attr_memory); } if (ipw->common_memory) { release_mem_region(ipw->request_common_memory.Base, ipw->request_common_memory.Size); iounmap(ipw->common_memory); - pcmcia_release_window(ipw->handle_common_memory); + pcmcia_release_window(link, ipw->handle_common_memory); } pcmcia_disable_device(link); return -1; @@ -287,9 +287,9 @@ static void release_ipwireless(struct ipw_dev *ipw) iounmap(ipw->attr_memory); } if (ipw->common_memory) - pcmcia_release_window(ipw->handle_common_memory); + pcmcia_release_window(ipw->link, ipw->handle_common_memory); if (ipw->attr_memory) - pcmcia_release_window(ipw->handle_attr_memory); + pcmcia_release_window(ipw->link, ipw->handle_attr_memory); /* Break the link with Card Services */ pcmcia_disable_device(ipw->link); diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index b698dba..16d9985 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -344,7 +344,7 @@ static void pcmciamtd_release(struct pcmcia_device *link) iounmap(dev->win_base); dev->win_base = NULL; } - pcmcia_release_window(link->win); + pcmcia_release_window(link, link->win); } pcmcia_disable_device(link); } diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index fdec5c3..61726c8 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -599,7 +599,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) } iounmap(base); - j = pcmcia_release_window(link->win); + j = pcmcia_release_window(link, link->win); return (i != 0x200) ? 0 : -1; } /* fmvj18x_get_hwinfo */ @@ -666,7 +666,7 @@ static void fmvj18x_release(struct pcmcia_device *link) tmp = lp->base; lp->base = NULL; /* set NULL before iounmap */ iounmap(tmp); - j = pcmcia_release_window(link->win); + j = pcmcia_release_window(link, link->win); } pcmcia_disable_device(link); diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 0d914f3..7a985ca 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -328,7 +328,7 @@ static void ibmtr_release(struct pcmcia_device *link) if (link->win) { struct tok_info *ti = netdev_priv(dev); iounmap(ti->mmio); - pcmcia_release_window(info->sram_win_handle); + pcmcia_release_window(link, info->sram_win_handle); } pcmcia_disable_device(link); } diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 80ab9de..4156a6c 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -337,7 +337,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) } iounmap(virt); - j = pcmcia_release_window(link->win); + j = pcmcia_release_window(link, link->win); return (i < NR_INFO) ? hw_info+i : NULL; } /* get_hwinfo */ @@ -1513,7 +1513,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, pcnet_reset_8390(dev); if (i != (TX_PAGES<<8)) { iounmap(info->base); - pcmcia_release_window(link->win); + pcmcia_release_window(link, link->win); info->base = NULL; link->win = NULL; goto failed; } diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 28db791..ed32875 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -820,10 +820,10 @@ static void ray_release(struct pcmcia_device *link) iounmap(local->rmem); iounmap(local->amem); /* Do bother checking to see if these succeed or not */ - i = pcmcia_release_window(local->amem_handle); + i = pcmcia_release_window(link, local->amem_handle); if (i != 0) dev_dbg(&link->dev, "ReleaseWindow(local->amem) ret = %x\n", i); - i = pcmcia_release_window(local->rmem_handle); + i = pcmcia_release_window(link, local->rmem_handle); if (i != 0) dev_dbg(&link->dev, "ReleaseWindow(local->rmem) ret = %x\n", i); pcmcia_disable_device(link); diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index cda48ea..820a6e5 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -442,9 +442,10 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) } /* pcmcia_release_irq */ -int pcmcia_release_window(window_handle_t win) +int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) { struct pcmcia_socket *s; + window_handle_t win = wh; if ((win == NULL) || (win->magic != WINDOW_MAGIC)) return -EINVAL; @@ -891,7 +892,7 @@ void pcmcia_disable_device(struct pcmcia_device *p_dev) { pcmcia_release_io(p_dev, &p_dev->io); pcmcia_release_irq(p_dev, &p_dev->irq); if (p_dev->win) - pcmcia_release_window(p_dev->win); + pcmcia_release_window(p_dev, p_dev->win); } EXPORT_SYMBOL(pcmcia_disable_device); diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index d82392d..40b098d 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -201,7 +201,7 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh); -int pcmcia_release_window(window_handle_t win); +int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); int pcmcia_get_mem_page(window_handle_t win, memreq_t *req); int pcmcia_map_mem_page(window_handle_t win, memreq_t *req); -- cgit v0.10.2 From 868575d1e87ff2091800aea816972ddb46de60d5 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:43 +0900 Subject: pcmcia: Pass struct pcmcia_device to pcmcia_map_mem_page() No logic changes, just pass struct pcmcia_device to pcmcia_map_mem_page() [linux@dominikbrodowski.net: update to 2.6.31] CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-scsi@vger.kernel.org CC: Jiri Kosina Acked-by: Karsten Keil (for ISDN) Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 91ca23d..10af1c0 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -125,7 +125,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, memreq_common_memory.CardOffset = cfg->mem.win[0].card_addr; memreq_common_memory.Page = 0; - ret = pcmcia_map_mem_page(ipw->handle_common_memory, + ret = pcmcia_map_mem_page(p_dev, ipw->handle_common_memory, &memreq_common_memory); if (ret != 0) @@ -154,7 +154,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, memreq_attr_memory.CardOffset = 0; memreq_attr_memory.Page = 0; - ret = pcmcia_map_mem_page(ipw->handle_attr_memory, + ret = pcmcia_map_mem_page(p_dev, ipw->handle_attr_memory, &memreq_attr_memory); if (ret != 0) diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c index 33d7530..598e17a 100644 --- a/drivers/isdn/hisax/sedlbauer_cs.c +++ b/drivers/isdn/hisax/sedlbauer_cs.c @@ -276,7 +276,7 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev, return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map) != 0) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0) return -ENODEV; } return 0; diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index 16d9985..80b9005 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -118,7 +118,7 @@ static caddr_t remap_window(struct map_info *map, unsigned long to) DEBUG(2, "Remapping window from 0x%8.8x to 0x%8.8x", dev->offset, mrq.CardOffset); mrq.Page = 0; - ret = pcmcia_map_mem_page(win, &mrq); + ret = pcmcia_map_mem_page(dev->p_dev, win, &mrq); if (ret != 0) return NULL; dev->offset = mrq.CardOffset; diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 61726c8..6b9c79e 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -574,7 +574,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) base = ioremap(req.Base, req.Size); mem.Page = 0; mem.CardOffset = 0; - pcmcia_map_mem_page(link->win, &mem); + pcmcia_map_mem_page(link, link->win, &mem); /* * MBH10304 CISTPL_FUNCE_LAN_NODE_ID format @@ -630,7 +630,7 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link) mem.Page = 0; mem.CardOffset = 0; - i = pcmcia_map_mem_page(link->win, &mem); + i = pcmcia_map_mem_page(link, link->win, &mem); if (i != 0) { iounmap(lp->base); lp->base = NULL; diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 7a985ca..6fc89eb 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -252,7 +252,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) mem.CardOffset = mmiobase; mem.Page = 0; - ret = pcmcia_map_mem_page(link->win, &mem); + ret = pcmcia_map_mem_page(link, link->win, &mem); if (ret) goto failed; ti->mmio = ioremap(req.Base, req.Size); @@ -269,7 +269,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) mem.CardOffset = srambase; mem.Page = 0; - ret = pcmcia_map_mem_page(info->sram_win_handle, &mem); + ret = pcmcia_map_mem_page(link, info->sram_win_handle, &mem); if (ret) goto failed; diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 4156a6c..75e6894 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -325,7 +325,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) mem.Page = 0; for (i = 0; i < NR_INFO; i++) { mem.CardOffset = hw_info[i].offset & ~(req.Size-1); - pcmcia_map_mem_page(link->win, &mem); + pcmcia_map_mem_page(link, link->win, &mem); base = &virt[hw_info[i].offset & (req.Size-1)]; if ((readb(base+0) == hw_info[i].a0) && (readb(base+2) == hw_info[i].a1) && @@ -1499,7 +1499,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, offset = mem.CardOffset % window_size; mem.CardOffset -= offset; mem.Page = 0; - ret = pcmcia_map_mem_page(link->win, &mem); + ret = pcmcia_map_mem_page(link, link->win, &mem); if (ret) goto failed; diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index 580ec44..4ceaa45 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -480,7 +480,7 @@ static int mhz_mfc_config(struct pcmcia_device *link) mem.CardOffset = mem.Page = 0; if (smc->manfid == MANFID_MOTOROLA) mem.CardOffset = link->conf.ConfigBase; - i = pcmcia_map_mem_page(link->win, &mem); + i = pcmcia_map_mem_page(link, link->win, &mem); if ((i == 0) && (smc->manfid == MANFID_MEGAHERTZ) diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index fbf9265..9bc4d60 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -932,7 +932,7 @@ xirc2ps_config(struct pcmcia_device * link) local->dingo_ccr = ioremap(req.Base,0x1000) + 0x0800; mem.CardOffset = 0x0; mem.Page = 0; - if ((err = pcmcia_map_mem_page(link->win, &mem))) + if ((err = pcmcia_map_mem_page(link, link->win, &mem))) goto config_error; /* Setup the CCRs; there are no infos in the CIS about the Ethernet diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c index a1b84fc..2aa6978 100644 --- a/drivers/net/wireless/airo_cs.c +++ b/drivers/net/wireless/airo_cs.c @@ -261,7 +261,7 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev, return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map) != 0) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0) return -ENODEV; } /* If we got this far, we're cool! */ diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c index cd14b7e..c0a34bb 100644 --- a/drivers/net/wireless/b43/pcmcia.c +++ b/drivers/net/wireless/b43/pcmcia.c @@ -93,7 +93,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev) mem.CardOffset = 0; mem.Page = 0; - res = pcmcia_map_mem_page(dev->win, &mem); + res = pcmcia_map_mem_page(dev, dev->win, &mem); if (res != 0) goto err_disable; diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index cbd85de..4a8c621 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -763,7 +763,7 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { if (ret) goto failed; mem.CardOffset = 0x20000; mem.Page = 0; - ret = pcmcia_map_mem_page(link->win, &mem); + ret = pcmcia_map_mem_page(link, link->win, &mem); if (ret) goto failed; diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index ed32875..f719ffc 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -443,7 +443,7 @@ static int ray_config(struct pcmcia_device *link) goto failed; mem.CardOffset = 0x0000; mem.Page = 0; - ret = pcmcia_map_mem_page(link->win, &mem); + ret = pcmcia_map_mem_page(link, link->win, &mem); if (ret) goto failed; local->sram = ioremap(req.Base, req.Size); @@ -459,7 +459,7 @@ static int ray_config(struct pcmcia_device *link) goto failed; mem.CardOffset = 0x8000; mem.Page = 0; - ret = pcmcia_map_mem_page(local->rmem_handle, &mem); + ret = pcmcia_map_mem_page(link, local->rmem_handle, &mem); if (ret) goto failed; local->rmem = ioremap(req.Base, req.Size); @@ -475,7 +475,7 @@ static int ray_config(struct pcmcia_device *link) goto failed; mem.CardOffset = 0x0000; mem.Page = 0; - ret = pcmcia_map_mem_page(local->amem_handle, &mem); + ret = pcmcia_map_mem_page(link, local->amem_handle, &mem); if (ret) goto failed; local->amem = ioremap(req.Base, req.Size); diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index 2fad4ac..d4df25d 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -3885,7 +3885,7 @@ wv_pcmcia_config(struct pcmcia_device * link) dev->mem_end = dev->mem_start + req.Size; mem.CardOffset = 0; mem.Page = 0; - i = pcmcia_map_mem_page(link->win, &mem); + i = pcmcia_map_mem_page(link, link->win, &mem); if (i != 0) break; diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index 820a6e5..b60952a 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -234,9 +234,12 @@ int pcmcia_get_mem_page(window_handle_t win, memreq_t *req) EXPORT_SYMBOL(pcmcia_get_mem_page); -int pcmcia_map_mem_page(window_handle_t win, memreq_t *req) +int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh, + memreq_t *req) { struct pcmcia_socket *s; + window_handle_t win = wh; + if ((win == NULL) || (win->magic != WINDOW_MAGIC)) return -EINVAL; s = win->sock; diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index e32c344..c54108f 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -1687,7 +1687,7 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev, if (pcmcia_request_window(&p_dev, &cfg_mem->req, &p_dev->win) != 0) goto next_entry; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map) != 0) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0) goto next_entry; cfg_mem->data->MmioAddress = (unsigned long) ioremap_nocache(cfg_mem->req.Base, cfg_mem->req.Size); diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c index e06d5b2..aeb85b3 100644 --- a/drivers/staging/comedi/drivers/ni_daq_700.c +++ b/drivers/staging/comedi/drivers/ni_daq_700.c @@ -616,7 +616,7 @@ static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev, return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map)) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map)) return -ENODEV; } /* If we got this far, we're cool! */ diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c index 9257a42..0968a67 100644 --- a/drivers/staging/comedi/drivers/ni_daq_dio24.c +++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c @@ -367,7 +367,7 @@ static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev, return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map)) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map)) return -ENODEV; } /* If we got this far, we're cool! */ diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c index 28ff4a6..b4c7dfa 100644 --- a/drivers/staging/comedi/drivers/ni_labpc_cs.c +++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c @@ -346,7 +346,7 @@ static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev, return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; - if (pcmcia_map_mem_page(p_dev->win, &map)) + if (pcmcia_map_mem_page(p_dev, p_dev->win, &map)) return -ENODEV; } /* If we got this far, we're cool! */ diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index 40b098d..f240bfa 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -204,7 +204,8 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); int pcmcia_get_mem_page(window_handle_t win, memreq_t *req); -int pcmcia_map_mem_page(window_handle_t win, memreq_t *req); +int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, + memreq_t *req); int pcmcia_modify_configuration(struct pcmcia_device *p_dev, modconf_t *mod); void pcmcia_disable_device(struct pcmcia_device *p_dev); -- cgit v0.10.2 From 16456ebabfec3f8f509fc18b45f256d066a1b360 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:48 +0900 Subject: pcmcia: Pass struct pcmcia_socket to pcmcia_get_mem_page() No logic changes, just pass struct pcmcia_socket to pcmcia_get_mem_page() [linux@dominikbrodowski.net: update to 2.6.31] Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 0a3ada9..8e09a49 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -227,6 +227,8 @@ extern void pcmcia_put_dev(struct pcmcia_device *p_dev); struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s, unsigned int function); +int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t win, + memreq_t *req); /* pcmcia_ioctl.c */ extern void __init pcmcia_setup_ioctl(void); diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c index 056fd13..c829ead 100644 --- a/drivers/pcmcia/pcmcia_ioctl.c +++ b/drivers/pcmcia/pcmcia_ioctl.c @@ -924,7 +924,7 @@ static int ds_ioctl(struct inode * inode, struct file * file, buf->win_info.handle->index + 1, &buf->win_info.window); break; case DS_GET_MEM_PAGE: - ret = pcmcia_get_mem_page(buf->win_info.handle, + ret = pcmcia_get_mem_page(s, buf->win_info.handle, &buf->win_info.map); break; case DS_REPLACE_CIS: diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index b60952a..a092749 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -223,8 +223,11 @@ EXPORT_SYMBOL(pcmcia_get_window); * * Change the card address of an already open memory window. */ -int pcmcia_get_mem_page(window_handle_t win, memreq_t *req) +int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh, + memreq_t *req) { + window_handle_t win = wh; + if ((win == NULL) || (win->magic != WINDOW_MAGIC)) return -EINVAL; req->Page = 0; diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index f240bfa..cbf5f05 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -202,8 +202,6 @@ int pcmcia_request_configuration(struct pcmcia_device *p_dev, int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh); int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); - -int pcmcia_get_mem_page(window_handle_t win, memreq_t *req); int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, memreq_t *req); -- cgit v0.10.2 From 0bdf9b3dd3cfa5cbd5d55172c19f5dd166208e17 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Dec 2006 19:46:53 +0900 Subject: pcmcia: Change window_handle_t logic to unsigned long Logic changes based on top of the other patches: This set of patches changed window_handle_t from being a pointer to an unsigned long. The unsigned long is now a simple index into socket->win[]. Going from a pointer to unsigned long should leave the user space interface unchanged unless I'm mistaken. This change results in code that is less error prone and a user space interface which is much cleaner and safer. A nice side effect is that we are also are able to remove all members except one from window_t. [ linux@dominikbrodowski.net: Update to 2.6.31. Also, a plain "index" to socket->win[] does not work, as several codepaths rely on "window_handle_t" being non-zero if used. Therefore, set the window_handle_t to the socket->win[] index + 1. ] CC: netdev@vger.kernel.org Signed-off-by: Magnus Damm Signed-off-by: Dominik Brodowski diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 75e6894..518b094 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -1514,7 +1514,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, if (i != (TX_PAGES<<8)) { iounmap(info->base); pcmcia_release_window(link, link->win); - info->base = NULL; link->win = NULL; + info->base = NULL; link->win = 0; goto failed; } diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 8e09a49..6df41de 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -149,8 +149,8 @@ extern struct list_head pcmcia_socket_list; extern struct class pcmcia_socket_class; int pcmcia_get_window(struct pcmcia_socket *s, - window_handle_t *handle, - int idx, + window_handle_t *wh_out, + window_handle_t wh, win_req_t *req); int pccard_register_pcmcia(struct pcmcia_socket *s, struct pcmcia_callback *c); struct pcmcia_socket *pcmcia_get_socket_by_nr(unsigned int nr); diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c index c829ead..6245fde 100644 --- a/drivers/pcmcia/pcmcia_ioctl.c +++ b/drivers/pcmcia/pcmcia_ioctl.c @@ -916,12 +916,12 @@ static int ds_ioctl(struct inode * inode, struct file * file, goto free_out; break; case DS_GET_FIRST_WINDOW: - ret = pcmcia_get_window(s, &buf->win_info.handle, 0, + ret = pcmcia_get_window(s, &buf->win_info.handle, 1, &buf->win_info.window); break; case DS_GET_NEXT_WINDOW: ret = pcmcia_get_window(s, &buf->win_info.handle, - buf->win_info.handle->index + 1, &buf->win_info.window); + buf->win_info.handle + 1, &buf->win_info.window); break; case DS_GET_MEM_PAGE: ret = pcmcia_get_mem_page(s, buf->win_info.handle, diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index a092749..ae6abc7 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -187,15 +187,19 @@ EXPORT_SYMBOL(pcmcia_access_configuration_register); /** pcmcia_get_window */ -int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *handle, - int idx, win_req_t *req) +int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out, + window_handle_t wh, win_req_t *req) { window_t *win; - int w; + window_handle_t w; if (!s || !(s->state & SOCKET_PRESENT)) return -ENODEV; - for (w = idx; w < MAX_WIN; w++) + + wh--; + if (wh >= MAX_WIN) + return -EINVAL; + for (w = wh; w < MAX_WIN; w++) if (s->state & SOCKET_WIN_REQ(w)) break; if (w == MAX_WIN) @@ -213,7 +217,8 @@ int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *handle, req->Attributes |= WIN_DATA_WIDTH_16; if (win->ctl.flags & MAP_USE_WAIT) req->Attributes |= WIN_USE_WAIT; - *handle = win; + + *wh_out = w++; return 0; } /* pcmcia_get_window */ EXPORT_SYMBOL(pcmcia_get_window); @@ -226,12 +231,12 @@ EXPORT_SYMBOL(pcmcia_get_window); int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh, memreq_t *req) { - window_handle_t win = wh; - - if ((win == NULL) || (win->magic != WINDOW_MAGIC)) + wh--; + if (wh >= MAX_WIN) return -EINVAL; + req->Page = 0; - req->CardOffset = win->ctl.card_start; + req->CardOffset = skt->win[wh].ctl.card_start; return 0; } /* pcmcia_get_mem_page */ EXPORT_SYMBOL(pcmcia_get_mem_page); @@ -240,18 +245,17 @@ EXPORT_SYMBOL(pcmcia_get_mem_page); int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh, memreq_t *req) { - struct pcmcia_socket *s; - window_handle_t win = wh; + struct pcmcia_socket *s = p_dev->socket; - if ((win == NULL) || (win->magic != WINDOW_MAGIC)) + wh--; + if (wh >= MAX_WIN) return -EINVAL; - s = win->sock; if (req->Page != 0) { dev_dbg(&s->dev, "failure: requested page is zero\n"); return -EINVAL; } - win->ctl.card_start = req->CardOffset; - if (s->ops->set_mem_map(s, &win->ctl) != 0) { + s->win[wh].ctl.card_start = req->CardOffset; + if (s->ops->set_mem_map(s, &s->win[wh].ctl) != 0) { dev_dbg(&s->dev, "failed to set_mem_map\n"); return -EIO; } @@ -450,13 +454,16 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) { - struct pcmcia_socket *s; - window_handle_t win = wh; + struct pcmcia_socket *s = p_dev->socket; + window_t *win; - if ((win == NULL) || (win->magic != WINDOW_MAGIC)) + wh--; + if (wh >= MAX_WIN) return -EINVAL; - s = win->sock; - if (!(win->handle->_win & CLIENT_WIN_REQ(win->index))) { + + win = &s->win[wh]; + + if (!(p_dev->_win & CLIENT_WIN_REQ(wh))) { dev_dbg(&s->dev, "not releasing unknown window\n"); return -EINVAL; } @@ -464,7 +471,7 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) /* Shut down memory window */ win->ctl.flags &= ~MAP_ACTIVE; s->ops->set_mem_map(s, &win->ctl); - s->state &= ~SOCKET_WIN_REQ(win->index); + s->state &= ~SOCKET_WIN_REQ(wh); /* Release system memory */ if (win->ctl.res) { @@ -472,9 +479,7 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) kfree(win->ctl.res); win->ctl.res = NULL; } - win->handle->_win &= ~CLIENT_WIN_REQ(win->index); - - win->magic = 0; + p_dev->_win &= ~CLIENT_WIN_REQ(wh); return 0; } /* pcmcia_release_window */ @@ -847,10 +852,6 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h } win = &s->win[w]; - win->magic = WINDOW_MAGIC; - win->index = w; - win->handle = *p_dev; - win->sock = s; if (!(s->features & SS_CAP_STATIC_MAP)) { win->ctl.res = pcmcia_find_mem_region(req->Base, req->Size, align, @@ -887,7 +888,7 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h } else { req->Base = win->ctl.res->start; } - *wh = win; + *wh = w + 1; return 0; } /* pcmcia_request_window */ diff --git a/include/pcmcia/cs_types.h b/include/pcmcia/cs_types.h index 315965a..f5e3b83 100644 --- a/include/pcmcia/cs_types.h +++ b/include/pcmcia/cs_types.h @@ -26,8 +26,7 @@ typedef u_int event_t; typedef u_char cisdata_t; typedef u_short page_t; -struct window_t; -typedef struct window_t *window_handle_t; +typedef unsigned long window_handle_t; struct region_t; typedef struct region_t *memory_handle_t; diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 753da9b..6301c3f 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -107,12 +107,7 @@ typedef struct io_window_t { struct resource *res; } io_window_t; -#define WINDOW_MAGIC 0xB35C typedef struct window_t { - u_short magic; - u_short index; - struct pcmcia_device *handle; - struct pcmcia_socket *sock; pccard_mem_map ctl; } window_t; -- cgit v0.10.2 From d7b0364bfc71c4abc97dfc47f85bb32363266e4e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 3 Nov 2009 01:05:33 +0100 Subject: pcmcia: move some window-related code to pcmcia_ioctl.c pcmcia_get_window() and pcmcia_get_mem_page() were only called from pcmcia_ioctl.c. Therefore, move these functions to that file, and remove the useless EXPORTs. Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/cs_internal.h b/drivers/pcmcia/cs_internal.h index 6df41de..3bc02d5 100644 --- a/drivers/pcmcia/cs_internal.h +++ b/drivers/pcmcia/cs_internal.h @@ -148,10 +148,6 @@ extern struct rw_semaphore pcmcia_socket_list_rwsem; extern struct list_head pcmcia_socket_list; extern struct class pcmcia_socket_class; -int pcmcia_get_window(struct pcmcia_socket *s, - window_handle_t *wh_out, - window_handle_t wh, - win_req_t *req); int pccard_register_pcmcia(struct pcmcia_socket *s, struct pcmcia_callback *c); struct pcmcia_socket *pcmcia_get_socket_by_nr(unsigned int nr); @@ -227,8 +223,6 @@ extern void pcmcia_put_dev(struct pcmcia_device *p_dev); struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s, unsigned int function); -int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t win, - memreq_t *req); /* pcmcia_ioctl.c */ extern void __init pcmcia_setup_ioctl(void); diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c index 6245fde..38b3a26 100644 --- a/drivers/pcmcia/pcmcia_ioctl.c +++ b/drivers/pcmcia/pcmcia_ioctl.c @@ -218,6 +218,61 @@ static int pcmcia_adjust_resource_info(adjust_t *adj) return (ret); } + +/** pcmcia_get_window + */ +static int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out, + window_handle_t wh, win_req_t *req) +{ + window_t *win; + window_handle_t w; + + wh--; + if (!s || !(s->state & SOCKET_PRESENT)) + return -ENODEV; + if (wh >= MAX_WIN) + return -EINVAL; + for (w = wh; w < MAX_WIN; w++) + if (s->state & SOCKET_WIN_REQ(w)) + break; + if (w == MAX_WIN) + return -EINVAL; + win = &s->win[w]; + req->Base = win->ctl.res->start; + req->Size = win->ctl.res->end - win->ctl.res->start + 1; + req->AccessSpeed = win->ctl.speed; + req->Attributes = 0; + if (win->ctl.flags & MAP_ATTRIB) + req->Attributes |= WIN_MEMORY_TYPE_AM; + if (win->ctl.flags & MAP_ACTIVE) + req->Attributes |= WIN_ENABLE; + if (win->ctl.flags & MAP_16BIT) + req->Attributes |= WIN_DATA_WIDTH_16; + if (win->ctl.flags & MAP_USE_WAIT) + req->Attributes |= WIN_USE_WAIT; + + *wh_out = w + 1; + return 0; +} /* pcmcia_get_window */ + + +/** pcmcia_get_mem_page + * + * Change the card address of an already open memory window. + */ +static int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh, + memreq_t *req) +{ + wh--; + if (wh >= MAX_WIN) + return -EINVAL; + + req->Page = 0; + req->CardOffset = skt->win[wh].ctl.card_start; + return 0; +} /* pcmcia_get_mem_page */ + + /** pccard_get_status * * Get the current socket state bits. We don't support the latched diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index ae6abc7..ae68b26 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -185,63 +185,6 @@ int pcmcia_access_configuration_register(struct pcmcia_device *p_dev, EXPORT_SYMBOL(pcmcia_access_configuration_register); -/** pcmcia_get_window - */ -int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out, - window_handle_t wh, win_req_t *req) -{ - window_t *win; - window_handle_t w; - - if (!s || !(s->state & SOCKET_PRESENT)) - return -ENODEV; - - wh--; - if (wh >= MAX_WIN) - return -EINVAL; - for (w = wh; w < MAX_WIN; w++) - if (s->state & SOCKET_WIN_REQ(w)) - break; - if (w == MAX_WIN) - return -EINVAL; - win = &s->win[w]; - req->Base = win->ctl.res->start; - req->Size = win->ctl.res->end - win->ctl.res->start + 1; - req->AccessSpeed = win->ctl.speed; - req->Attributes = 0; - if (win->ctl.flags & MAP_ATTRIB) - req->Attributes |= WIN_MEMORY_TYPE_AM; - if (win->ctl.flags & MAP_ACTIVE) - req->Attributes |= WIN_ENABLE; - if (win->ctl.flags & MAP_16BIT) - req->Attributes |= WIN_DATA_WIDTH_16; - if (win->ctl.flags & MAP_USE_WAIT) - req->Attributes |= WIN_USE_WAIT; - - *wh_out = w++; - return 0; -} /* pcmcia_get_window */ -EXPORT_SYMBOL(pcmcia_get_window); - - -/** pcmcia_get_mem_page - * - * Change the card address of an already open memory window. - */ -int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh, - memreq_t *req) -{ - wh--; - if (wh >= MAX_WIN) - return -EINVAL; - - req->Page = 0; - req->CardOffset = skt->win[wh].ctl.card_start; - return 0; -} /* pcmcia_get_mem_page */ -EXPORT_SYMBOL(pcmcia_get_mem_page); - - int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh, memreq_t *req) { -- cgit v0.10.2 From 82f88e36004162f49a9340ffbbaebe89016e4835 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 3 Nov 2009 01:16:12 +0100 Subject: pcmcia: remove unused "window_t" typedef Signed-off-by: Dominik Brodowski diff --git a/drivers/pcmcia/pcmcia_ioctl.c b/drivers/pcmcia/pcmcia_ioctl.c index 38b3a26..c4d7908 100644 --- a/drivers/pcmcia/pcmcia_ioctl.c +++ b/drivers/pcmcia/pcmcia_ioctl.c @@ -224,7 +224,7 @@ static int pcmcia_adjust_resource_info(adjust_t *adj) static int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out, window_handle_t wh, win_req_t *req) { - window_t *win; + pccard_mem_map *win; window_handle_t w; wh--; @@ -238,17 +238,17 @@ static int pcmcia_get_window(struct pcmcia_socket *s, window_handle_t *wh_out, if (w == MAX_WIN) return -EINVAL; win = &s->win[w]; - req->Base = win->ctl.res->start; - req->Size = win->ctl.res->end - win->ctl.res->start + 1; - req->AccessSpeed = win->ctl.speed; + req->Base = win->res->start; + req->Size = win->res->end - win->res->start + 1; + req->AccessSpeed = win->speed; req->Attributes = 0; - if (win->ctl.flags & MAP_ATTRIB) + if (win->flags & MAP_ATTRIB) req->Attributes |= WIN_MEMORY_TYPE_AM; - if (win->ctl.flags & MAP_ACTIVE) + if (win->flags & MAP_ACTIVE) req->Attributes |= WIN_ENABLE; - if (win->ctl.flags & MAP_16BIT) + if (win->flags & MAP_16BIT) req->Attributes |= WIN_DATA_WIDTH_16; - if (win->ctl.flags & MAP_USE_WAIT) + if (win->flags & MAP_USE_WAIT) req->Attributes |= WIN_USE_WAIT; *wh_out = w + 1; @@ -268,7 +268,7 @@ static int pcmcia_get_mem_page(struct pcmcia_socket *skt, window_handle_t wh, return -EINVAL; req->Page = 0; - req->CardOffset = skt->win[wh].ctl.card_start; + req->CardOffset = skt->win[wh].card_start; return 0; } /* pcmcia_get_mem_page */ diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index ae68b26..5046854 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -197,8 +197,8 @@ int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t wh, dev_dbg(&s->dev, "failure: requested page is zero\n"); return -EINVAL; } - s->win[wh].ctl.card_start = req->CardOffset; - if (s->ops->set_mem_map(s, &s->win[wh].ctl) != 0) { + s->win[wh].card_start = req->CardOffset; + if (s->ops->set_mem_map(s, &s->win[wh]) != 0) { dev_dbg(&s->dev, "failed to set_mem_map\n"); return -EIO; } @@ -398,7 +398,7 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) { struct pcmcia_socket *s = p_dev->socket; - window_t *win; + pccard_mem_map *win; wh--; if (wh >= MAX_WIN) @@ -412,15 +412,15 @@ int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t wh) } /* Shut down memory window */ - win->ctl.flags &= ~MAP_ACTIVE; - s->ops->set_mem_map(s, &win->ctl); + win->flags &= ~MAP_ACTIVE; + s->ops->set_mem_map(s, win); s->state &= ~SOCKET_WIN_REQ(wh); /* Release system memory */ - if (win->ctl.res) { - release_resource(win->ctl.res); - kfree(win->ctl.res); - win->ctl.res = NULL; + if (win->res) { + release_resource(win->res); + kfree(win->res); + win->res = NULL; } p_dev->_win &= ~CLIENT_WIN_REQ(wh); @@ -755,7 +755,7 @@ EXPORT_SYMBOL(pcmcia_request_irq); int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh) { struct pcmcia_socket *s = (*p_dev)->socket; - window_t *win; + pccard_mem_map *win; u_long align; int w; @@ -797,9 +797,9 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h win = &s->win[w]; if (!(s->features & SS_CAP_STATIC_MAP)) { - win->ctl.res = pcmcia_find_mem_region(req->Base, req->Size, align, + win->res = pcmcia_find_mem_region(req->Base, req->Size, align, (req->Attributes & WIN_MAP_BELOW_1MB), s); - if (!win->ctl.res) { + if (!win->res) { dev_dbg(&s->dev, "allocating mem region failed\n"); return -EINVAL; } @@ -807,19 +807,19 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h (*p_dev)->_win |= CLIENT_WIN_REQ(w); /* Configure the socket controller */ - win->ctl.map = w+1; - win->ctl.flags = 0; - win->ctl.speed = req->AccessSpeed; + win->map = w+1; + win->flags = 0; + win->speed = req->AccessSpeed; if (req->Attributes & WIN_MEMORY_TYPE) - win->ctl.flags |= MAP_ATTRIB; + win->flags |= MAP_ATTRIB; if (req->Attributes & WIN_ENABLE) - win->ctl.flags |= MAP_ACTIVE; + win->flags |= MAP_ACTIVE; if (req->Attributes & WIN_DATA_WIDTH_16) - win->ctl.flags |= MAP_16BIT; + win->flags |= MAP_16BIT; if (req->Attributes & WIN_USE_WAIT) - win->ctl.flags |= MAP_USE_WAIT; - win->ctl.card_start = 0; - if (s->ops->set_mem_map(s, &win->ctl) != 0) { + win->flags |= MAP_USE_WAIT; + win->card_start = 0; + if (s->ops->set_mem_map(s, win) != 0) { dev_dbg(&s->dev, "failed to set memory mapping\n"); return -EIO; } @@ -827,9 +827,9 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h /* Return window handle */ if (s->features & SS_CAP_STATIC_MAP) { - req->Base = win->ctl.static_start; + req->Base = win->static_start; } else { - req->Base = win->ctl.res->start; + req->Base = win->res->start; } *wh = w + 1; diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h index 6301c3f..d85f725 100644 --- a/include/pcmcia/ss.h +++ b/include/pcmcia/ss.h @@ -107,10 +107,6 @@ typedef struct io_window_t { struct resource *res; } io_window_t; -typedef struct window_t { - pccard_mem_map ctl; -} window_t; - /* Maximum number of IO windows per socket */ #define MAX_IO_WIN 2 @@ -150,7 +146,7 @@ struct pcmcia_socket { u_int Config; } irq; io_window_t io[MAX_IO_WIN]; - window_t win[MAX_WIN]; + pccard_mem_map win[MAX_WIN]; struct list_head cis_cache; size_t fake_cis_len; u8 *fake_cis; -- cgit v0.10.2 From 6838b03fc6564ea07d0cd87ea6e198d90ab1fc3e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 3 Nov 2009 01:31:52 +0100 Subject: pcmcia: pcmcia_request_window() doesn't need a pointer to a pointer pcmcia_request_window() only needs a pointer to struct pcmcia_device, not a pointer to a pointer. CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-scsi@vger.kernel.org CC: Jiri Kosina Acked-by: Karsten Keil (for ISDN) Signed-off-by: Dominik Brodowski diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 10af1c0..082146a 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -116,7 +116,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, ipw->request_common_memory.Size = 0x1000; ipw->request_common_memory.AccessSpeed = 0; - ret = pcmcia_request_window(&p_dev, &ipw->request_common_memory, + ret = pcmcia_request_window(p_dev, &ipw->request_common_memory, &ipw->handle_common_memory); if (ret != 0) @@ -145,7 +145,7 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, ipw->request_attr_memory.Size = 0; /* this used to be 0x1000 */ ipw->request_attr_memory.AccessSpeed = 0; - ret = pcmcia_request_window(&p_dev, &ipw->request_attr_memory, + ret = pcmcia_request_window(p_dev, &ipw->request_attr_memory, &ipw->handle_attr_memory); if (ret != 0) diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c index 598e17a..331716f 100644 --- a/drivers/isdn/hisax/sedlbauer_cs.c +++ b/drivers/isdn/hisax/sedlbauer_cs.c @@ -272,7 +272,7 @@ static int sedlbauer_config_check(struct pcmcia_device *p_dev, req->Base = mem->win[0].host_addr; req->Size = mem->win[0].len; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win) != 0) + if (pcmcia_request_window(p_dev, req, &p_dev->win) != 0) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; diff --git a/drivers/mtd/maps/pcmciamtd.c b/drivers/mtd/maps/pcmciamtd.c index 80b9005..689d6a7 100644 --- a/drivers/mtd/maps/pcmciamtd.c +++ b/drivers/mtd/maps/pcmciamtd.c @@ -530,7 +530,7 @@ static int pcmciamtd_config(struct pcmcia_device *link) int ret; DEBUG(2, "requesting window with size = %dKiB memspeed = %d", req.Size >> 10, req.AccessSpeed); - ret = pcmcia_request_window(&link, &req, &link->win); + ret = pcmcia_request_window(link, &req, &link->win); DEBUG(2, "ret = %d dev->win_size = %d", ret, dev->win_size); if(ret) { req.Size >>= 1; diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 6b9c79e..85f7c45 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -567,7 +567,7 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = 0; req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) return -1; @@ -618,7 +618,7 @@ static int fmvj18x_setup_mfc(struct pcmcia_device *link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = 0; req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) return -1; diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 6fc89eb..14fe5fa 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -246,7 +246,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x2000; req.AccessSpeed = 250; - ret = pcmcia_request_window(&link, &req, &link->win); + ret = pcmcia_request_window(link, &req, &link->win); if (ret) goto failed; @@ -263,7 +263,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) req.Base = 0; req.Size = sramsize * 1024; req.AccessSpeed = 250; - ret = pcmcia_request_window(&link, &req, &info->sram_win_handle); + ret = pcmcia_request_window(link, &req, &info->sram_win_handle); if (ret) goto failed; diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 518b094..d06a0ce 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -317,7 +317,7 @@ static hw_info_t *get_hwinfo(struct pcmcia_device *link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = 0; req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) return NULL; @@ -1491,7 +1491,7 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, req.Attributes |= WIN_USE_WAIT; req.Base = 0; req.Size = window_size; req.AccessSpeed = mem_speed; - ret = pcmcia_request_window(&link, &req, &link->win); + ret = pcmcia_request_window(link, &req, &link->win); if (ret) goto failed; diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index 4ceaa45..c6ca376 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -472,7 +472,7 @@ static int mhz_mfc_config(struct pcmcia_device *link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = req.Size = 0; req.AccessSpeed = 0; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) return -ENODEV; diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index 9bc4d60..265852a 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -926,7 +926,7 @@ xirc2ps_config(struct pcmcia_device * link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = req.Size = 0; req.AccessSpeed = 0; - if ((err = pcmcia_request_window(&link, &req, &link->win))) + if ((err = pcmcia_request_window(link, &req, &link->win))) goto config_error; local->dingo_ccr = ioremap(req.Base,0x1000) + 0x0800; diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c index 2aa6978..34d775c 100644 --- a/drivers/net/wireless/airo_cs.c +++ b/drivers/net/wireless/airo_cs.c @@ -257,7 +257,7 @@ static int airo_cs_config_check(struct pcmcia_device *p_dev, req->Base = mem->win[0].host_addr; req->Size = mem->win[0].len; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win) != 0) + if (pcmcia_request_window(p_dev, req, &p_dev->win) != 0) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c index c0a34bb..2588358 100644 --- a/drivers/net/wireless/b43/pcmcia.c +++ b/drivers/net/wireless/b43/pcmcia.c @@ -87,7 +87,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev) win.Base = 0; win.Size = SSB_CORE_SIZE; win.AccessSpeed = 250; - res = pcmcia_request_window(&dev, &win, &dev->win); + res = pcmcia_request_window(dev, &win, &dev->win); if (res != 0) goto err_kfree_ssb; diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index 4a8c621..bd4eff7 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -759,7 +759,7 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_CM|WIN_ENABLE; req.Base = 0; req.Size = 0x8000; req.AccessSpeed = mem_speed; - ret = pcmcia_request_window(&link, &req, &link->win); + ret = pcmcia_request_window(link, &req, &link->win); if (ret) goto failed; mem.CardOffset = 0x20000; mem.Page = 0; diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index f719ffc..66e2d10 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -438,7 +438,7 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x8000; req.AccessSpeed = ray_mem_speed; - ret = pcmcia_request_window(&link, &req, &link->win); + ret = pcmcia_request_window(link, &req, &link->win); if (ret) goto failed; mem.CardOffset = 0x0000; @@ -454,7 +454,7 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x4000; req.AccessSpeed = ray_mem_speed; - ret = pcmcia_request_window(&link, &req, &local->rmem_handle); + ret = pcmcia_request_window(link, &req, &local->rmem_handle); if (ret) goto failed; mem.CardOffset = 0x8000; @@ -470,7 +470,7 @@ static int ray_config(struct pcmcia_device *link) req.Base = 0; req.Size = 0x1000; req.AccessSpeed = ray_mem_speed; - ret = pcmcia_request_window(&link, &req, &local->amem_handle); + ret = pcmcia_request_window(link, &req, &local->amem_handle); if (ret) goto failed; mem.CardOffset = 0x0000; diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index d4df25d..f8c5166 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -3876,7 +3876,7 @@ wv_pcmcia_config(struct pcmcia_device * link) req.Attributes = WIN_DATA_WIDTH_8|WIN_MEMORY_TYPE_AM|WIN_ENABLE; req.Base = req.Size = 0; req.AccessSpeed = mem_speed; - i = pcmcia_request_window(&link, &req, &link->win); + i = pcmcia_request_window(link, &req, &link->win); if (i != 0) break; diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index 5046854..1cf7d54 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -752,9 +752,9 @@ EXPORT_SYMBOL(pcmcia_request_irq); * Request_window() establishes a mapping between card memory space * and system memory space. */ -int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_handle_t *wh) +int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_handle_t *wh) { - struct pcmcia_socket *s = (*p_dev)->socket; + struct pcmcia_socket *s = p_dev->socket; pccard_mem_map *win; u_long align; int w; @@ -804,7 +804,7 @@ int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, window_h return -EINVAL; } } - (*p_dev)->_win |= CLIENT_WIN_REQ(w); + p_dev->_win |= CLIENT_WIN_REQ(w); /* Configure the socket controller */ win->map = w+1; diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index c54108f..9dfd6f5 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -1684,7 +1684,7 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev, if (cfg_mem->req.Size < 0x1000) cfg_mem->req.Size = 0x1000; cfg_mem->req.AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, &cfg_mem->req, &p_dev->win) != 0) + if (pcmcia_request_window(p_dev, &cfg_mem->req, &p_dev->win) != 0) goto next_entry; map.Page = 0; map.CardOffset = mem->win[0].card_addr; if (pcmcia_map_mem_page(p_dev, p_dev->win, &map) != 0) diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c index aeb85b3..7328a84 100644 --- a/drivers/staging/comedi/drivers/ni_daq_700.c +++ b/drivers/staging/comedi/drivers/ni_daq_700.c @@ -612,7 +612,7 @@ static int dio700_pcmcia_config_loop(struct pcmcia_device *p_dev, if (req->Size < 0x1000) req->Size = 0x1000; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win)) + if (pcmcia_request_window(p_dev, req, &p_dev->win)) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c index 0968a67..5056315 100644 --- a/drivers/staging/comedi/drivers/ni_daq_dio24.c +++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c @@ -363,7 +363,7 @@ static int dio24_pcmcia_config_loop(struct pcmcia_device *p_dev, if (req->Size < 0x1000) req->Size = 0x1000; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win)) + if (pcmcia_request_window(p_dev, req, &p_dev->win)) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c index b4c7dfa..7fb5058 100644 --- a/drivers/staging/comedi/drivers/ni_labpc_cs.c +++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c @@ -342,7 +342,7 @@ static int labpc_pcmcia_config_loop(struct pcmcia_device *p_dev, if (req->Size < 0x1000) req->Size = 0x1000; req->AccessSpeed = 0; - if (pcmcia_request_window(&p_dev, req, &p_dev->win)) + if (pcmcia_request_window(p_dev, req, &p_dev->win)) return -ENODEV; map.Page = 0; map.CardOffset = mem->win[0].card_addr; diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index cbf5f05..d6c55fd 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -199,7 +199,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req); int pcmcia_request_configuration(struct pcmcia_device *p_dev, config_req_t *req); -int pcmcia_request_window(struct pcmcia_device **p_dev, win_req_t *req, +int pcmcia_request_window(struct pcmcia_device *p_dev, win_req_t *req, window_handle_t *wh); int pcmcia_release_window(struct pcmcia_device *p_dev, window_handle_t win); int pcmcia_map_mem_page(struct pcmcia_device *p_dev, window_handle_t win, -- cgit v0.10.2 From dd2e5a156525f11754d9b1e0583f6bb49c253d62 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 3 Nov 2009 10:27:34 +0100 Subject: pcmcia: remove deprecated handle_to_dev() macro Update remaining users and remove deprecated handle_to_dev() macro CC: Harald Welte CC: netdev@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-serial@vger.kernel.org Signed-off-by: Dominik Brodowski diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c index 1611c4f..2db4c0a 100644 --- a/drivers/char/pcmcia/cm4000_cs.c +++ b/drivers/char/pcmcia/cm4000_cs.c @@ -45,7 +45,7 @@ /* #define ATR_CSUM */ -#define reader_to_dev(x) (&handle_to_dev(x->p_dev)) +#define reader_to_dev(x) (&x->p_dev->dev) /* n (debug level) is ignored */ /* additional debug output may be enabled by re-compiling with diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c index 38790db..a6a70e4 100644 --- a/drivers/char/pcmcia/cm4040_cs.c +++ b/drivers/char/pcmcia/cm4040_cs.c @@ -39,7 +39,7 @@ #include "cm4040_cs.h" -#define reader_to_dev(x) (&handle_to_dev(x->p_dev)) +#define reader_to_dev(x) (&x->p_dev->dev) /* n (debug level) is ignored */ /* additional debug output may be enabled by re-compiling with @@ -539,7 +539,7 @@ static int cm4040_config_check(struct pcmcia_device *p_dev, p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK; rc = pcmcia_request_io(p_dev, &p_dev->io); - dev_printk(KERN_INFO, &handle_to_dev(p_dev), + dev_printk(KERN_INFO, &p_dev->dev, "pcmcia_request_io returned 0x%x\n", rc); return rc; } @@ -561,7 +561,7 @@ static int reader_config(struct pcmcia_device *link, int devno) fail_rc = pcmcia_request_configuration(link, &link->conf); if (fail_rc != 0) { - dev_printk(KERN_INFO, &handle_to_dev(link), + dev_printk(KERN_INFO, &link->dev, "pcmcia_request_configuration failed 0x%x\n", fail_rc); goto cs_release; diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index dbef5d9..8b65e18 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -449,7 +449,7 @@ static int tc574_config(struct pcmcia_device *link) } link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "3c574_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c index 6eac62b..c43c21d 100644 --- a/drivers/net/pcmcia/3c589_cs.c +++ b/drivers/net/pcmcia/3c589_cs.c @@ -316,7 +316,7 @@ static int tc589_config(struct pcmcia_device *link) printk(KERN_ERR "3c589_cs: invalid if_port requested\n"); link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_ERR "3c589_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c index 5af2ccf..0552ddd 100644 --- a/drivers/net/pcmcia/axnet_cs.c +++ b/drivers/net/pcmcia/axnet_cs.c @@ -400,7 +400,7 @@ static int axnet_config(struct pcmcia_device *link) info->phy_id = (i < 32) ? i : -1; link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "axnet_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c index 9a2e500..51e9cb0 100644 --- a/drivers/net/pcmcia/com20020_cs.c +++ b/drivers/net/pcmcia/com20020_cs.c @@ -302,7 +302,7 @@ static int com20020_config(struct pcmcia_device *link) lp->card_flags = ARC_CAN_10MBIT; /* pretend all of them can 10Mbit */ link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = com20020_found(dev, 0); /* calls register_netdev */ diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 85f7c45..9b5ca37c 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -532,7 +532,7 @@ static int fmvj18x_config(struct pcmcia_device *link) lp->cardtype = cardtype; link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "fmvj18x_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 14fe5fa..76706e1 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -287,7 +287,7 @@ static int __devinit ibmtr_config(struct pcmcia_device *link) ibmtr_hw_setup(dev, mmiobase); link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = ibmtr_probe_card(dev); if (i != 0) { diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c index a536348..2d0c6f9 100644 --- a/drivers/net/pcmcia/nmclan_cs.c +++ b/drivers/net/pcmcia/nmclan_cs.c @@ -701,7 +701,7 @@ static int nmclan_config(struct pcmcia_device *link) printk(KERN_NOTICE "nmclan_cs: invalid if_port requested\n"); link->dev_node = &lp->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = register_netdev(dev); if (i != 0) { diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index d06a0ce..30baee7 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -646,7 +646,7 @@ static int pcnet_config(struct pcmcia_device *link) mii_phy_probe(dev); link->dev_node = &info->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_NOTICE "pcnet_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index c6ca376..2e795de 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -964,7 +964,7 @@ static int smc91c92_config(struct pcmcia_device *link) } link->dev_node = &smc->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_ERR "smc91c92_cs: register_netdev() failed\n"); diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index 265852a..f75ac71 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -992,7 +992,7 @@ xirc2ps_config(struct pcmcia_device * link) do_reset(dev, 1); /* a kludge to make the cem56 work */ link->dev_node = &local->node; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if ((err=register_netdev(dev))) { printk(KNOT_XIRC "register_netdev() failed\n"); diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c index 34d775c..7d3a96f 100644 --- a/drivers/net/wireless/airo_cs.c +++ b/drivers/net/wireless/airo_cs.c @@ -322,7 +322,7 @@ static int airo_config(struct pcmcia_device *link) goto failed; ((local_info_t *)link->priv)->eth_dev = init_airo_card(link->irq.AssignedIRQ, - link->io.BasePort1, 1, &handle_to_dev(link)); + link->io.BasePort1, 1, &link->dev); if (!((local_info_t *)link->priv)->eth_dev) goto failed; diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c index 7838572..18a58b8 100644 --- a/drivers/net/wireless/atmel_cs.c +++ b/drivers/net/wireless/atmel_cs.c @@ -260,7 +260,7 @@ static int atmel_config(struct pcmcia_device *link) struct pcmcia_device_id *did; dev = link->priv; - did = dev_get_drvdata(&handle_to_dev(link)); + did = dev_get_drvdata(&link->dev); dev_dbg(&link->dev, "atmel_config\n"); @@ -309,7 +309,7 @@ static int atmel_config(struct pcmcia_device *link) init_atmel_card(link->irq.AssignedIRQ, link->io.BasePort1, did ? did->driver_info : ATMEL_FW_TYPE_NONE, - &handle_to_dev(link), + &link->dev, card_present, link); if (!((local_info_t*)link->priv)->eth_dev) diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c index ca3ab84..243e912 100644 --- a/drivers/net/wireless/hostap/hostap_cs.c +++ b/drivers/net/wireless/hostap/hostap_cs.c @@ -623,7 +623,7 @@ static int prism2_config(struct pcmcia_device *link) /* Need to allocate net_device before requesting IRQ handler */ dev = prism2_init_local_data(&prism2_pccard_funcs, 0, - &handle_to_dev(link)); + &link->dev); if (dev == NULL) goto failed; link->priv = dev; diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c index cb40c38..f2b1655 100644 --- a/drivers/net/wireless/libertas/if_cs.c +++ b/drivers/net/wireless/libertas/if_cs.c @@ -590,7 +590,7 @@ static int if_cs_prog_helper(struct if_cs_card *card) /* TODO: make firmware file configurable */ ret = request_firmware(&fw, "libertas_cs_helper.fw", - &handle_to_dev(card->p_dev)); + &card->p_dev->dev); if (ret) { lbs_pr_err("can't load helper firmware\n"); ret = -ENODEV; @@ -663,7 +663,7 @@ static int if_cs_prog_real(struct if_cs_card *card) /* TODO: make firmware file configurable */ ret = request_firmware(&fw, "libertas_cs.fw", - &handle_to_dev(card->p_dev)); + &card->p_dev->dev); if (ret) { lbs_pr_err("can't load firmware\n"); ret = -ENODEV; diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index bd4eff7..f5333b7 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -773,7 +773,7 @@ static int netwave_pcmcia_config(struct pcmcia_device *link) { dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev) != 0) { printk(KERN_DEBUG "netwave_cs: register_netdev() failed\n"); diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c index da626ec..688b398 100644 --- a/drivers/net/wireless/orinoco/orinoco_cs.c +++ b/drivers/net/wireless/orinoco/orinoco_cs.c @@ -109,7 +109,7 @@ orinoco_cs_probe(struct pcmcia_device *link) struct orinoco_private *priv; struct orinoco_pccard *card; - priv = alloc_orinocodev(sizeof(*card), &handle_to_dev(link), + priv = alloc_orinocodev(sizeof(*card), &link->dev, orinoco_cs_hard_reset, NULL); if (!priv) return -ENOMEM; diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c index 700010e..c609371 100644 --- a/drivers/net/wireless/orinoco/spectrum_cs.c +++ b/drivers/net/wireless/orinoco/spectrum_cs.c @@ -182,7 +182,7 @@ spectrum_cs_probe(struct pcmcia_device *link) struct orinoco_private *priv; struct orinoco_pccard *card; - priv = alloc_orinocodev(sizeof(*card), &handle_to_dev(link), + priv = alloc_orinocodev(sizeof(*card), &link->dev, spectrum_cs_hard_reset, spectrum_cs_stop_firmware); if (!priv) diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 66e2d10..5e0f4c3 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -488,7 +488,7 @@ static int ray_config(struct pcmcia_device *link) return -ENODEV; } - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = register_netdev(dev); if (i != 0) { printk("ray_config register_netdev() failed\n"); diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index f8c5166..df3579a 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -3899,7 +3899,7 @@ wv_pcmcia_config(struct pcmcia_device * link) lp->mem, dev->irq, (u_int) dev->base_addr); #endif - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); i = register_netdev(dev); if(i != 0) { diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 7e8e269..9a956c7 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1975,7 +1975,7 @@ static int wl3501_config(struct pcmcia_device *link) dev->irq = link->irq.AssignedIRQ; dev->base_addr = link->io.BasePort1; - SET_NETDEV_DEV(dev, &handle_to_dev(link)); + SET_NETDEV_DEV(dev, &link->dev); if (register_netdev(dev)) { printk(KERN_NOTICE "wl3501_cs: register_netdev() failed\n"); goto failed; diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c index 3b31bee..8d651a6 100644 --- a/drivers/serial/serial_cs.c +++ b/drivers/serial/serial_cs.c @@ -387,7 +387,7 @@ static int setup_serial(struct pcmcia_device *handle, struct serial_info * info, port.irq = irq; port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | UPF_SHARE_IRQ; port.uartclk = 1843200; - port.dev = &handle_to_dev(handle); + port.dev = &handle->dev; if (buggy_uart) port.flags |= UPF_BUGGY_UART; diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c index 4607742..f87aba6 100644 --- a/drivers/usb/host/sl811_cs.c +++ b/drivers/usb/host/sl811_cs.c @@ -185,7 +185,7 @@ static int sl811_cs_config_check(struct pcmcia_device *p_dev, static int sl811_cs_config(struct pcmcia_device *link) { - struct device *parent = &handle_to_dev(link); + struct device *parent = &link->dev; local_info_t *dev = link->priv; int ret; diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h index d6c55fd..d403c12 100644 --- a/include/pcmcia/ds.h +++ b/include/pcmcia/ds.h @@ -138,9 +138,6 @@ struct pcmcia_device { #define to_pcmcia_dev(n) container_of(n, struct pcmcia_device, dev) #define to_pcmcia_drv(n) container_of(n, struct pcmcia_driver, drv) -/* deprecated -- don't use! */ -#define handle_to_dev(handle) (handle->dev) - /* * CIS access. diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c index 64b8599..447aaae 100644 --- a/sound/pcmcia/pdaudiocf/pdaudiocf.c +++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c @@ -131,7 +131,7 @@ static int snd_pdacf_probe(struct pcmcia_device *link) return err; } - snd_card_set_dev(card, &handle_to_dev(link)); + snd_card_set_dev(card, &link->dev); pdacf->index = i; card_list[i] = card; diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index 1492744..5a5db48 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -244,7 +244,7 @@ static int vxpocket_config(struct pcmcia_device *link) if (ret) goto failed; - chip->dev = &handle_to_dev(link); + chip->dev = &link->dev; snd_card_set_dev(chip->card, chip->dev); if (snd_vxpocket_assign_resources(chip, link->io.BasePort1, link->irq.AssignedIRQ) < 0) -- cgit v0.10.2 From 5fa9167a1bf5f5a4b7282f5e7ac56a4a5a1fa044 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 8 Nov 2009 17:24:46 +0100 Subject: pcmcia: rework the irq_req_t typedef Most of the irq_req_t typedef'd struct can be re-worked quite easily: (1) IRQInfo2 was unused in any case, so drop it. (2) IRQInfo1 was used write-only, so drop it. (3) Instance (private data to be passed to the IRQ handler): Most PCMCIA drivers using pcmcia_request_irq() to actually register an IRQ handler set the "dev_id" to the same pointer as the "priv" pointer in struct pcmcia_device. Modify the two exceptions (ipwireless, ibmtr_cs) to also work this waym and set the IRQ handler's "dev_id" to p_dev->priv unconditionally. (4) Handler is to be of type irq_handler_t. (5) Handler != NULL already tells whether an IRQ handler is present. Therefore, we do not need the IRQ_HANDLER_PRESENT flag in irq_req_t.Attributes. CC: netdev@vger.kernel.org CC: linux-bluetooth@vger.kernel.org CC: linux-ide@vger.kernel.org CC: linux-wireless@vger.kernel.org CC: linux-scsi@vger.kernel.org CC: alsa-devel@alsa-project.org CC: Jaroslav Kysela CC: Jiri Kosina CC: Karsten Keil for the Bluetooth parts: Acked-by: Marcel Holtmann Signed-off-by: Dominik Brodowski diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c index 5f94e21..1b392c9 100644 --- a/drivers/ata/pata_pcmcia.c +++ b/drivers/ata/pata_pcmcia.c @@ -268,7 +268,6 @@ static int pcmcia_init_one(struct pcmcia_device *pdev) pdev->io.Attributes2 = IO_DATA_PATH_WIDTH_8; pdev->io.IOAddrLines = 3; pdev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - pdev->irq.IRQInfo1 = IRQ_LEVEL_ID; pdev->conf.Attributes = CONF_ENABLE_IRQ; pdev->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c index 1e0c4d8..2acdc60 100644 --- a/drivers/bluetooth/bluecard_cs.c +++ b/drivers/bluetooth/bluecard_cs.c @@ -867,11 +867,9 @@ static int bluecard_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = bluecard_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/bluetooth/bt3c_cs.c b/drivers/bluetooth/bt3c_cs.c index 9787fda..d814a27 100644 --- a/drivers/bluetooth/bt3c_cs.c +++ b/drivers/bluetooth/bt3c_cs.c @@ -659,11 +659,9 @@ static int bt3c_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = bt3c_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/bluetooth/btuart_cs.c b/drivers/bluetooth/btuart_cs.c index f44d752..d339464 100644 --- a/drivers/bluetooth/btuart_cs.c +++ b/drivers/bluetooth/btuart_cs.c @@ -588,11 +588,9 @@ static int btuart_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = btuart_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/bluetooth/dtl1_cs.c b/drivers/bluetooth/dtl1_cs.c index 7cd8614..4f02a6f 100644 --- a/drivers/bluetooth/dtl1_cs.c +++ b/drivers/bluetooth/dtl1_cs.c @@ -573,11 +573,9 @@ static int dtl1_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = dtl1_interrupt; - link->irq.Instance = info; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/char/pcmcia/ipwireless/hardware.c b/drivers/char/pcmcia/ipwireless/hardware.c index 4c1820c..99cffda 100644 --- a/drivers/char/pcmcia/ipwireless/hardware.c +++ b/drivers/char/pcmcia/ipwireless/hardware.c @@ -1213,12 +1213,12 @@ static irqreturn_t ipwireless_handle_v2_v3_interrupt(int irq, irqreturn_t ipwireless_interrupt(int irq, void *dev_id) { - struct ipw_hardware *hw = dev_id; + struct ipw_dev *ipw = dev_id; - if (hw->hw_version == HW_VERSION_1) - return ipwireless_handle_v1_interrupt(irq, hw); + if (ipw->hardware->hw_version == HW_VERSION_1) + return ipwireless_handle_v1_interrupt(irq, ipw->hardware); else - return ipwireless_handle_v2_v3_interrupt(irq, hw); + return ipwireless_handle_v2_v3_interrupt(irq, ipw->hardware); } static void flush_packets_to_hw(struct ipw_hardware *hw) diff --git a/drivers/char/pcmcia/ipwireless/main.c b/drivers/char/pcmcia/ipwireless/main.c index 082146a..dff24da 100644 --- a/drivers/char/pcmcia/ipwireless/main.c +++ b/drivers/char/pcmcia/ipwireless/main.c @@ -93,8 +93,6 @@ static int ipwireless_probe(struct pcmcia_device *p_dev, p_dev->io.NumPorts1 = cfg->io.win[0].len; p_dev->io.IOAddrLines = 16; - p_dev->irq.IRQInfo1 = cfg->irq.IRQInfo1; - /* 0x40 causes it to generate level mode interrupts. */ /* 0x04 enables IREQ pin. */ p_dev->conf.ConfigIndex = cfg->index | 0x44; @@ -197,9 +195,8 @@ static int config_ipwireless(struct ipw_dev *ipw) link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = ipwireless_interrupt; - link->irq.Instance = ipw->hardware; INIT_WORK(&ipw->work_reboot, signalled_reboot_work); @@ -315,7 +312,6 @@ static int ipwireless_attach(struct pcmcia_device *link) ipw->link = link; link->priv = ipw; - link->irq.Instance = ipw; /* Link this device into our device list. */ link->dev_node = &ipw->nodes[0]; diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 09b2590..c31a0d9 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -554,7 +554,6 @@ static int mgslpc_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; link->conf.Attributes = 0; @@ -609,9 +608,7 @@ static int mgslpc_config(struct pcmcia_device *link) link->conf.ConfigIndex = 8; link->conf.Present = PRESENT_OPTION; - link->irq.Attributes |= IRQ_HANDLE_PRESENT; link->irq.Handler = mgslpc_isr; - link->irq.Instance = info; ret = pcmcia_request_irq(link, &link->irq); if (ret) diff --git a/drivers/ide/ide-cs.c b/drivers/ide/ide-cs.c index 6cee6c8..dd63963 100644 --- a/drivers/ide/ide-cs.c +++ b/drivers/ide/ide-cs.c @@ -103,7 +103,6 @@ static int ide_probe(struct pcmcia_device *link) link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->io.IOAddrLines = 3; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/isdn/hardware/avm/avm_cs.c b/drivers/isdn/hardware/avm/avm_cs.c index d388ead..5a6ae64 100644 --- a/drivers/isdn/hardware/avm/avm_cs.c +++ b/drivers/isdn/hardware/avm/avm_cs.c @@ -111,8 +111,6 @@ static int avmcs_probe(struct pcmcia_device *p_dev) p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE; p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; - /* General socket configuration */ p_dev->conf.Attributes = CONF_ENABLE_IRQ; p_dev->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/isdn/hisax/avma1_cs.c b/drivers/isdn/hisax/avma1_cs.c index 6d963f9..f9bdff3 100644 --- a/drivers/isdn/hisax/avma1_cs.c +++ b/drivers/isdn/hisax/avma1_cs.c @@ -123,8 +123,6 @@ static int avma1cs_probe(struct pcmcia_device *p_dev) p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE; p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; - /* General socket configuration */ p_dev->conf.Attributes = CONF_ENABLE_IRQ; p_dev->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/isdn/hisax/elsa_cs.c b/drivers/isdn/hisax/elsa_cs.c index cdcd297..a2f709f 100644 --- a/drivers/isdn/hisax/elsa_cs.c +++ b/drivers/isdn/hisax/elsa_cs.c @@ -138,7 +138,6 @@ static int elsa_cs_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = IRQ_LEVEL_ID|IRQ_SHARE_ID; link->irq.Handler = NULL; /* diff --git a/drivers/isdn/hisax/sedlbauer_cs.c b/drivers/isdn/hisax/sedlbauer_cs.c index 331716f..af5d393 100644 --- a/drivers/isdn/hisax/sedlbauer_cs.c +++ b/drivers/isdn/hisax/sedlbauer_cs.c @@ -145,7 +145,6 @@ static int sedlbauer_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* diff --git a/drivers/isdn/hisax/teles_cs.c b/drivers/isdn/hisax/teles_cs.c index 7b11c15..ea70539 100644 --- a/drivers/isdn/hisax/teles_cs.c +++ b/drivers/isdn/hisax/teles_cs.c @@ -128,7 +128,6 @@ static int teles_probe(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = IRQ_LEVEL_ID|IRQ_SHARE_ID; link->irq.Handler = NULL; /* diff --git a/drivers/net/pcmcia/3c574_cs.c b/drivers/net/pcmcia/3c574_cs.c index 8b65e18..17a2722 100644 --- a/drivers/net/pcmcia/3c574_cs.c +++ b/drivers/net/pcmcia/3c574_cs.c @@ -283,10 +283,8 @@ static int tc574_probe(struct pcmcia_device *link) spin_lock_init(&lp->window_lock); link->io.NumPorts1 = 32; link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &el3_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; diff --git a/drivers/net/pcmcia/3c589_cs.c b/drivers/net/pcmcia/3c589_cs.c index c43c21d..6f8d7e2 100644 --- a/drivers/net/pcmcia/3c589_cs.c +++ b/drivers/net/pcmcia/3c589_cs.c @@ -194,10 +194,8 @@ static int tc589_probe(struct pcmcia_device *link) spin_lock_init(&lp->lock); link->io.NumPorts1 = 16; link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &el3_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c index 0552ddd..800597b 100644 --- a/drivers/net/pcmcia/axnet_cs.c +++ b/drivers/net/pcmcia/axnet_cs.c @@ -170,7 +170,6 @@ static int axnet_probe(struct pcmcia_device *link) info->p_dev = link; link->priv = dev; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/net/pcmcia/com20020_cs.c b/drivers/net/pcmcia/com20020_cs.c index 51e9cb0..21d9c9d8 100644 --- a/drivers/net/pcmcia/com20020_cs.c +++ b/drivers/net/pcmcia/com20020_cs.c @@ -164,11 +164,10 @@ static int com20020_probe(struct pcmcia_device *p_dev) p_dev->io.NumPorts1 = 16; p_dev->io.IOAddrLines = 16; p_dev->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; p_dev->conf.Attributes = CONF_ENABLE_IRQ; p_dev->conf.IntType = INT_MEMORY_AND_IO; - p_dev->irq.Instance = info->dev = dev; + info->dev = dev; p_dev->priv = info; return com20020_config(p_dev); @@ -275,9 +274,8 @@ static int com20020_config(struct pcmcia_device *link) ioaddr = dev->base_addr = link->io.BasePort1; dev_dbg(&link->dev, "got ioaddr %Xh\n", ioaddr); - dev_dbg(&link->dev, "request IRQ %d (%Xh/%Xh)\n", - link->irq.AssignedIRQ, - link->irq.IRQInfo1, link->irq.IRQInfo2); + dev_dbg(&link->dev, "request IRQ %d\n", + link->irq.AssignedIRQ); i = pcmcia_request_irq(link, &link->irq); if (i != 0) { diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 9b5ca37c..6e3e1ce 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -255,10 +255,8 @@ static int fmvj18x_probe(struct pcmcia_device *link) link->io.IOAddrLines = 5; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &fjn_interrupt; - link->irq.Instance = dev; /* General socket configuration */ link->conf.Attributes = CONF_ENABLE_IRQ; @@ -428,7 +426,7 @@ static int fmvj18x_config(struct pcmcia_device *link) if (link->io.NumPorts2 != 0) { link->irq.Attributes = - IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT; + IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; ret = mfc_try_io_port(link); if (ret != 0) goto failed; } else if (cardtype == UNGERMANN) { diff --git a/drivers/net/pcmcia/ibmtr_cs.c b/drivers/net/pcmcia/ibmtr_cs.c index 76706e1..37f4a6f 100644 --- a/drivers/net/pcmcia/ibmtr_cs.c +++ b/drivers/net/pcmcia/ibmtr_cs.c @@ -119,6 +119,12 @@ static const struct ethtool_ops netdev_ethtool_ops = { .get_drvinfo = netdev_get_drvinfo, }; +static irqreturn_t ibmtr_interrupt(int irq, void *dev_id) { + ibmtr_dev_t *info = dev_id; + struct net_device *dev = info->dev; + return tok_interrupt(irq, dev); +}; + /*====================================================================== ibmtr_attach() creates an "instance" of the driver, allocating @@ -150,14 +156,13 @@ static int __devinit ibmtr_attach(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 4; link->io.IOAddrLines = 16; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; - link->irq.Handler = &tok_interrupt; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; + link->irq.Handler = ibmtr_interrupt; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; - link->irq.Instance = info->dev = dev; + info->dev = dev; SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops); diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c index 2d0c6f9..dae5ef6 100644 --- a/drivers/net/pcmcia/nmclan_cs.c +++ b/drivers/net/pcmcia/nmclan_cs.c @@ -463,10 +463,8 @@ static int nmclan_probe(struct pcmcia_device *link) link->io.NumPorts1 = 32; link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 5; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; link->irq.Handler = &mace_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; diff --git a/drivers/net/pcmcia/pcnet_cs.c b/drivers/net/pcmcia/pcnet_cs.c index 30baee7..de2d100 100644 --- a/drivers/net/pcmcia/pcnet_cs.c +++ b/drivers/net/pcmcia/pcnet_cs.c @@ -266,7 +266,6 @@ static int pcnet_probe(struct pcmcia_device *link) link->priv = dev; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c index 2e795de..9e0da37 100644 --- a/drivers/net/pcmcia/smc91c92_cs.c +++ b/drivers/net/pcmcia/smc91c92_cs.c @@ -329,10 +329,8 @@ static int smc91c92_probe(struct pcmcia_device *link) link->io.NumPorts1 = 16; link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 4; - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &smc_interrupt; - link->irq.Instance = dev; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -456,7 +454,7 @@ static int mhz_mfc_config(struct pcmcia_device *link) link->conf.Attributes |= CONF_ENABLE_SPKR; link->conf.Status = CCSR_AUDIO_ENA; link->irq.Attributes = - IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT; + IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; link->io.IOAddrLines = 16; link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts2 = 8; @@ -655,7 +653,7 @@ static int osi_config(struct pcmcia_device *link) link->conf.Attributes |= CONF_ENABLE_SPKR; link->conf.Status = CCSR_AUDIO_ENA; link->irq.Attributes = - IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED|IRQ_HANDLE_PRESENT; + IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; link->io.NumPorts1 = 64; link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts2 = 8; diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index f75ac71..fe504b7 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -556,7 +556,6 @@ xirc2ps_probe(struct pcmcia_device *link) link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; link->irq.Handler = xirc2ps_interrupt; - link->irq.Instance = dev; /* Fill in card specific entries */ dev->netdev_ops = &netdev_ops; @@ -835,8 +834,6 @@ xirc2ps_config(struct pcmcia_device * link) link->io.IOAddrLines =10; link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; - link->irq.Attributes = IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; if (local->modem) { int pass; diff --git a/drivers/net/wireless/airo_cs.c b/drivers/net/wireless/airo_cs.c index 7d3a96f..f6036fb 100644 --- a/drivers/net/wireless/airo_cs.c +++ b/drivers/net/wireless/airo_cs.c @@ -134,7 +134,6 @@ static int airo_probe(struct pcmcia_device *p_dev) /* Interrupt setup */ p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; p_dev->irq.Handler = NULL; /* diff --git a/drivers/net/wireless/atmel_cs.c b/drivers/net/wireless/atmel_cs.c index 18a58b8..3240791 100644 --- a/drivers/net/wireless/atmel_cs.c +++ b/drivers/net/wireless/atmel_cs.c @@ -143,7 +143,6 @@ static int atmel_probe(struct pcmcia_device *p_dev) /* Interrupt setup */ p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; p_dev->irq.Handler = NULL; /* diff --git a/drivers/net/wireless/b43/pcmcia.c b/drivers/net/wireless/b43/pcmcia.c index 2588358..984174b 100644 --- a/drivers/net/wireless/b43/pcmcia.c +++ b/drivers/net/wireless/b43/pcmcia.c @@ -98,9 +98,7 @@ static int __devinit b43_pcmcia_probe(struct pcmcia_device *dev) goto err_disable; dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - dev->irq.IRQInfo1 = IRQ_LEVEL_ID; dev->irq.Handler = NULL; /* The handler is registered later. */ - dev->irq.Instance = NULL; res = pcmcia_request_irq(dev, &dev->irq); if (res != 0) goto err_disable; diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c index 243e912..c9640a3 100644 --- a/drivers/net/wireless/hostap/hostap_cs.c +++ b/drivers/net/wireless/hostap/hostap_cs.c @@ -641,11 +641,8 @@ static int prism2_config(struct pcmcia_device *link) * irq structure is initialized. */ if (link->conf.Attributes & CONF_ENABLE_IRQ) { - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | - IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = prism2_interrupt; - link->irq.Instance = dev; ret = pcmcia_request_irq(link, &link->irq); if (ret) goto failed; diff --git a/drivers/net/wireless/libertas/if_cs.c b/drivers/net/wireless/libertas/if_cs.c index f2b1655..b1d8459 100644 --- a/drivers/net/wireless/libertas/if_cs.c +++ b/drivers/net/wireless/libertas/if_cs.c @@ -837,7 +837,6 @@ static int if_cs_probe(struct pcmcia_device *p_dev) p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = NULL; - p_dev->irq.IRQInfo1 = IRQ_INFO2_VALID | IRQ_LEVEL_ID; p_dev->conf.Attributes = 0; p_dev->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/net/wireless/netwave_cs.c b/drivers/net/wireless/netwave_cs.c index f5333b7..e61e6b9 100644 --- a/drivers/net/wireless/netwave_cs.c +++ b/drivers/net/wireless/netwave_cs.c @@ -384,8 +384,7 @@ static int netwave_probe(struct pcmcia_device *link) link->io.IOAddrLines = 5; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = &netwave_interrupt; /* General socket configuration */ @@ -404,8 +403,6 @@ static int netwave_probe(struct pcmcia_device *link) dev->watchdog_timeo = TX_TIMEOUT; - link->irq.Instance = dev; - return netwave_pcmcia_config( link); } /* netwave_attach */ diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c index 688b398..f27bb83 100644 --- a/drivers/net/wireless/orinoco/orinoco_cs.c +++ b/drivers/net/wireless/orinoco/orinoco_cs.c @@ -120,10 +120,8 @@ orinoco_cs_probe(struct pcmcia_device *link) link->priv = priv; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = orinoco_interrupt; - link->irq.Instance = priv; /* General socket configuration defaults can go here. In this * client, we assume very little, and rely on the CIS for diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c index c609371..59bda24 100644 --- a/drivers/net/wireless/orinoco/spectrum_cs.c +++ b/drivers/net/wireless/orinoco/spectrum_cs.c @@ -194,10 +194,8 @@ spectrum_cs_probe(struct pcmcia_device *link) link->priv = priv; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = orinoco_interrupt; - link->irq.Instance = priv; /* General socket configuration defaults can go here. In this * client, we assume very little, and rely on the CIS for diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 5e0f4c3..91213f9 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -323,8 +323,7 @@ static int ray_probe(struct pcmcia_device *p_dev) p_dev->io.IOAddrLines = 5; /* Interrupt setup. For PCMCIA, driver takes what's given */ - p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; + p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = &ray_interrupt; /* General socket configuration */ @@ -333,7 +332,6 @@ static int ray_probe(struct pcmcia_device *p_dev) p_dev->conf.ConfigIndex = 1; p_dev->priv = dev; - p_dev->irq.Instance = dev; local->finder = p_dev; local->card_status = CARD_INSERTED; diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index df3579a..33918fd 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -4438,8 +4438,7 @@ wavelan_probe(struct pcmcia_device *p_dev) p_dev->io.IOAddrLines = 3; /* Interrupt setup */ - p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; + p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = wavelan_interrupt; /* General socket configuration */ @@ -4451,7 +4450,7 @@ wavelan_probe(struct pcmcia_device *p_dev) if (!dev) return -ENOMEM; - p_dev->priv = p_dev->irq.Instance = dev; + p_dev->priv = dev; lp = netdev_priv(dev); diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 9a956c7..5f0401a 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1898,8 +1898,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev) p_dev->io.IOAddrLines = 5; /* Interrupt setup */ - p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - p_dev->irq.IRQInfo1 = IRQ_LEVEL_ID; + p_dev->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; p_dev->irq.Handler = wl3501_interrupt; /* General socket configuration */ @@ -1922,7 +1921,7 @@ static int wl3501_probe(struct pcmcia_device *p_dev) dev->wireless_handlers = &wl3501_handler_def; SET_ETHTOOL_OPS(dev, &ops); netif_stop_queue(dev); - p_dev->priv = p_dev->irq.Instance = dev; + p_dev->priv = dev; return wl3501_config(p_dev); out_link: diff --git a/drivers/parport/parport_cs.c b/drivers/parport/parport_cs.c index e56a4de..7dd370f 100644 --- a/drivers/parport/parport_cs.c +++ b/drivers/parport/parport_cs.c @@ -106,7 +106,6 @@ static int parport_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.Attributes2 = IO_DATA_PATH_WIDTH_8; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index 1cf7d54..a8bf8c1 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -383,8 +383,8 @@ static int pcmcia_release_irq(struct pcmcia_device *p_dev, irq_req_t *req) s->irq.AssignedIRQ = 0; } - if (req->Attributes & IRQ_HANDLE_PRESENT) { - free_irq(req->AssignedIRQ, req->Instance); + if (req->Handler) { + free_irq(req->AssignedIRQ, p_dev->priv); } #ifdef CONFIG_PCMCIA_PROBE @@ -664,7 +664,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) /* if the underlying IRQ infrastructure allows for it, only allocate * the IRQ, but do not enable it */ - if (!(req->Attributes & IRQ_HANDLE_PRESENT)) + if (!(req->Handler)) type |= IRQ_NOAUTOEN; #endif /* IRQ_NOAUTOEN */ @@ -674,7 +674,7 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) } else { int try; u32 mask = s->irq_mask; - void *data = &p_dev->dev.driver; /* something unique to this device */ + void *data = p_dev; /* something unique to this device */ for (try = 0; try < 64; try++) { irq = try % 32; @@ -691,12 +691,12 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) * registering a dummy handle works, i.e. if the IRQ isn't * marked as used by the kernel resource management core */ ret = request_irq(irq, - (req->Attributes & IRQ_HANDLE_PRESENT) ? req->Handler : test_action, + (req->Handler) ? req->Handler : test_action, type, p_dev->devname, - (req->Attributes & IRQ_HANDLE_PRESENT) ? req->Instance : data); + (req->Handler) ? p_dev->priv : data); if (!ret) { - if (!(req->Attributes & IRQ_HANDLE_PRESENT)) + if (!req->Handler) free_irq(irq, data); break; } @@ -713,9 +713,9 @@ int pcmcia_request_irq(struct pcmcia_device *p_dev, irq_req_t *req) irq = s->pci_irq; } - if (ret && (req->Attributes & IRQ_HANDLE_PRESENT)) { + if (ret && req->Handler) { ret = request_irq(irq, req->Handler, type, - p_dev->devname, req->Instance); + p_dev->devname, p_dev->priv); if (ret) { dev_printk(KERN_INFO, &s->dev, "request_irq() failed\n"); diff --git a/drivers/scsi/pcmcia/aha152x_stub.c b/drivers/scsi/pcmcia/aha152x_stub.c index 4329e4e..528733b 100644 --- a/drivers/scsi/pcmcia/aha152x_stub.c +++ b/drivers/scsi/pcmcia/aha152x_stub.c @@ -106,7 +106,6 @@ static int aha152x_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; diff --git a/drivers/scsi/pcmcia/fdomain_stub.c b/drivers/scsi/pcmcia/fdomain_stub.c index 5792b55..9140406 100644 --- a/drivers/scsi/pcmcia/fdomain_stub.c +++ b/drivers/scsi/pcmcia/fdomain_stub.c @@ -89,7 +89,6 @@ static int fdomain_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index 9dfd6f5..c2341af 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -1564,12 +1564,10 @@ static int nsp_cs_probe(struct pcmcia_device *link) link->io.IOAddrLines = 10; /* not used */ /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; /* Interrupt handler */ link->irq.Handler = &nspintr; - link->irq.Instance = info; link->irq.Attributes |= IRQF_SHARED; /* General socket configuration */ diff --git a/drivers/scsi/pcmcia/qlogic_stub.c b/drivers/scsi/pcmcia/qlogic_stub.c index 65d7ad58..f85f094 100644 --- a/drivers/scsi/pcmcia/qlogic_stub.c +++ b/drivers/scsi/pcmcia/qlogic_stub.c @@ -162,7 +162,6 @@ static int qlogic_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.Present = PRESENT_OPTION; diff --git a/drivers/scsi/pcmcia/sym53c500_cs.c b/drivers/scsi/pcmcia/sym53c500_cs.c index 851a41c..e7564d8 100644 --- a/drivers/scsi/pcmcia/sym53c500_cs.c +++ b/drivers/scsi/pcmcia/sym53c500_cs.c @@ -867,7 +867,6 @@ SYM53C500_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.IOAddrLines = 10; link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; diff --git a/drivers/serial/serial_cs.c b/drivers/serial/serial_cs.c index 8d651a6..5d6f947 100644 --- a/drivers/serial/serial_cs.c +++ b/drivers/serial/serial_cs.c @@ -334,7 +334,6 @@ static int serial_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_8; link->io.NumPorts1 = 8; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; if (do_sound) { link->conf.Attributes |= CONF_ENABLE_SPKR; diff --git a/drivers/staging/comedi/drivers/cb_das16_cs.c b/drivers/staging/comedi/drivers/cb_das16_cs.c index 9e75802..39923cb 100644 --- a/drivers/staging/comedi/drivers/cb_das16_cs.c +++ b/drivers/staging/comedi/drivers/cb_das16_cs.c @@ -703,7 +703,6 @@ static int das16cs_pcmcia_attach(struct pcmcia_device *link) /* Initialize the pcmcia_device structure */ /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; link->conf.Attributes = 0; diff --git a/drivers/staging/comedi/drivers/das08_cs.c b/drivers/staging/comedi/drivers/das08_cs.c index 384a77a..9b945e5 100644 --- a/drivers/staging/comedi/drivers/das08_cs.c +++ b/drivers/staging/comedi/drivers/das08_cs.c @@ -173,7 +173,6 @@ static int das08_pcmcia_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* diff --git a/drivers/staging/comedi/drivers/ni_daq_700.c b/drivers/staging/comedi/drivers/ni_daq_700.c index 7328a84..ef5e118 100644 --- a/drivers/staging/comedi/drivers/ni_daq_700.c +++ b/drivers/staging/comedi/drivers/ni_daq_700.c @@ -503,7 +503,6 @@ static int dio700_cs_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* diff --git a/drivers/staging/comedi/drivers/ni_daq_dio24.c b/drivers/staging/comedi/drivers/ni_daq_dio24.c index 5056315..9017be3 100644 --- a/drivers/staging/comedi/drivers/ni_daq_dio24.c +++ b/drivers/staging/comedi/drivers/ni_daq_dio24.c @@ -254,7 +254,6 @@ static int dio24_cs_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = NULL; /* diff --git a/drivers/staging/comedi/drivers/ni_labpc_cs.c b/drivers/staging/comedi/drivers/ni_labpc_cs.c index 7fb5058..7d514b3 100644 --- a/drivers/staging/comedi/drivers/ni_labpc_cs.c +++ b/drivers/staging/comedi/drivers/ni_labpc_cs.c @@ -230,7 +230,6 @@ static int labpc_cs_attach(struct pcmcia_device *link) /* Interrupt setup */ link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_FORCED_PULSE; - link->irq.IRQInfo1 = IRQ_INFO2_VALID | IRQ_PULSE_ID; link->irq.Handler = NULL; /* diff --git a/drivers/staging/comedi/drivers/ni_mio_cs.c b/drivers/staging/comedi/drivers/ni_mio_cs.c index ca7ab4a..d692f4b 100644 --- a/drivers/staging/comedi/drivers/ni_mio_cs.c +++ b/drivers/staging/comedi/drivers/ni_mio_cs.c @@ -274,7 +274,6 @@ static int cs_attach(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_16; link->io.NumPorts1 = 16; link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; @@ -325,9 +324,6 @@ static int mio_pcmcia_config_loop(struct pcmcia_device *p_dev, p_dev->io.IOAddrLines = cfg->io.flags & CISTPL_IO_LINES_MASK; p_dev->io.NumPorts2 = 0; - p_dev->irq.IRQInfo1 = cfg->irq.IRQInfo1; - p_dev->irq.IRQInfo2 = cfg->irq.IRQInfo2; - for (base = 0x000; base < 0x400; base += 0x20) { p_dev->io.BasePort1 = base; ret = pcmcia_request_io(p_dev, &p_dev->io); diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c index 48e7c27..5256fd9 100644 --- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c +++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c @@ -1041,10 +1041,8 @@ static int daqp_cs_attach(struct pcmcia_device *link) link->priv = local; /* Interrupt setup */ - link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING | IRQ_HANDLE_PRESENT; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; + link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING; link->irq.Handler = daqp_interrupt; - link->irq.Instance = local; /* General socket configuration defaults can go here. In this diff --git a/drivers/usb/host/sl811_cs.c b/drivers/usb/host/sl811_cs.c index f87aba6..39d253e 100644 --- a/drivers/usb/host/sl811_cs.c +++ b/drivers/usb/host/sl811_cs.c @@ -243,7 +243,6 @@ static int sl811_cs_probe(struct pcmcia_device *link) /* Initialize */ link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_INFO2_VALID|IRQ_LEVEL_ID; link->irq.Handler = NULL; link->conf.Attributes = 0; diff --git a/include/pcmcia/cs.h b/include/pcmcia/cs.h index 904468a..afc2bfb 100644 --- a/include/pcmcia/cs.h +++ b/include/pcmcia/cs.h @@ -15,6 +15,10 @@ #ifndef _LINUX_CS_H #define _LINUX_CS_H +#ifdef __KERNEL__ +#include +#endif + /* For AccessConfigurationRegister */ typedef struct conf_reg_t { u_char Function; @@ -111,11 +115,9 @@ typedef struct io_req_t { /* For RequestIRQ and ReleaseIRQ */ typedef struct irq_req_t { - u_int Attributes; - u_int AssignedIRQ; - u_int IRQInfo1, IRQInfo2; /* IRQInfo2 is ignored */ - void *Handler; - void *Instance; + u_int Attributes; + u_int AssignedIRQ; + irq_handler_t Handler; } irq_req_t; /* Attributes for RequestIRQ and ReleaseIRQ */ @@ -125,7 +127,7 @@ typedef struct irq_req_t { #define IRQ_TYPE_DYNAMIC_SHARING 0x02 #define IRQ_FORCED_PULSE 0x04 #define IRQ_FIRST_SHARED 0x08 -#define IRQ_HANDLE_PRESENT 0x10 +//#define IRQ_HANDLE_PRESENT 0x10 #define IRQ_PULSE_ALLOCATED 0x100 /* Bits in IRQInfo1 field */ diff --git a/sound/pcmcia/pdaudiocf/pdaudiocf.c b/sound/pcmcia/pdaudiocf/pdaudiocf.c index 447aaae..7717e01 100644 --- a/sound/pcmcia/pdaudiocf/pdaudiocf.c +++ b/sound/pcmcia/pdaudiocf/pdaudiocf.c @@ -142,12 +142,10 @@ static int snd_pdacf_probe(struct pcmcia_device *link) link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.NumPorts1 = 16; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT | IRQ_FORCED_PULSE; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_FORCED_PULSE; // link->irq.Attributes = IRQ_TYPE_DYNAMIC_SHARING|IRQ_FIRST_SHARED; - link->irq.IRQInfo1 = 0 /* | IRQ_LEVEL_ID */; link->irq.Handler = pdacf_interrupt; - link->irq.Instance = pdacf; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; link->conf.ConfigIndex = 1; diff --git a/sound/pcmcia/vx/vxpocket.c b/sound/pcmcia/vx/vxpocket.c index 5a5db48..7be3b33 100644 --- a/sound/pcmcia/vx/vxpocket.c +++ b/sound/pcmcia/vx/vxpocket.c @@ -161,11 +161,9 @@ static int snd_vxpocket_new(struct snd_card *card, int ibl, link->io.Attributes1 = IO_DATA_PATH_WIDTH_AUTO; link->io.NumPorts1 = 16; - link->irq.Attributes = IRQ_TYPE_EXCLUSIVE | IRQ_HANDLE_PRESENT; + link->irq.Attributes = IRQ_TYPE_EXCLUSIVE; - link->irq.IRQInfo1 = IRQ_LEVEL_ID; link->irq.Handler = &snd_vx_irq_handler; - link->irq.Instance = chip; link->conf.Attributes = CONF_ENABLE_IRQ; link->conf.IntType = INT_MEMORY_AND_IO; -- cgit v0.10.2 From f8be4231f82ab56a87ce74906671afbe1aa9ec75 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 30 Nov 2009 01:18:46 -0600 Subject: perf trace/scripting: Silence PERL_EMBED_* backtick errors The backtick shell substitutions for PERL_EMBED_LDOPT/CCOPT make a lot of noise on stderr if Embed.pm isn't installed - this silences them. Signed-off-by: Tom Zanussi Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: anton@samba.org Cc: hch@infradead.org LKML-Reference: <1259565529-6407-2-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 8ad57b5..d62a2d7 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -490,8 +490,8 @@ else LIB_OBJS += util/probe-finder.o endif -PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts` -PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts` +PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null` +PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { perl_alloc(); return 0; }') | $(CC) -x c - $(PERL_EMBED_CCOPTS) -o /dev/null $(PERL_EMBED_LDOPTS) > /dev/null 2>&1 && echo y"), y) BASIC_CFLAGS += -DNO_LIBPERL -- cgit v0.10.2 From e136323c5a8a7d91d17c5b7b340758bb9dd33739 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 30 Nov 2009 01:18:47 -0600 Subject: perf trace/scripting: Ignore shadowed variable warning for perf-trace-perl.c The debugging versions of the ENTER and LEAVE internal perl macros, used when embedding perl, define a local block with a my_perl perl variable that shadows a global variable of the same name, which is also the name expected by the embedding API for the embedded interpreter. Since we don't have control over the code generated in this case (it's an externality) and can't get rid of the warning, ignore it. Signed-off-by: Tom Zanussi Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: anton@samba.org Cc: hch@infradead.org LKML-Reference: <1259565529-6407-3-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index d62a2d7..20cd663 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -872,7 +872,7 @@ util/find_next_bit.o: ../../lib/find_next_bit.c PERF-CFLAGS $(QUIET_CC)$(CC) -o util/find_next_bit.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< util/trace-event-perl.o: util/trace-event-perl.c PERF-CFLAGS - $(QUIET_CC)$(CC) -o util/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter $< + $(QUIET_CC)$(CC) -o util/trace-event-perl.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c PERF-CFLAGS $(QUIET_CC)$(CC) -o scripts/perl/Perf-Trace-Util/Context.o -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< -- cgit v0.10.2 From 61381de0504181368672a83d2e14c38dbaf3c136 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 30 Nov 2009 01:18:48 -0600 Subject: perf trace/scripting: Fix Perl common_* access functions The common_* functions (e.g. common_pc(), etc) are exported as common_* but named get_common_*, resulting in unresolved subroutine errors when executing scripts. Make the internal and external names match. Signed-off-by: Tom Zanussi Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: anton@samba.org Cc: hch@infradead.org LKML-Reference: <1259565529-6407-4-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c index 3ba3ffc..af78d9a 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c @@ -39,8 +39,8 @@ #line 41 "Context.c" -XS(XS_Perf__Trace__Context_get_common_pc); /* prototype to pass -Wmissing-prototypes */ -XS(XS_Perf__Trace__Context_get_common_pc) +XS(XS_Perf__Trace__Context_common_pc); /* prototype to pass -Wmissing-prototypes */ +XS(XS_Perf__Trace__Context_common_pc) { #ifdef dVAR dVAR; dXSARGS; @@ -48,22 +48,22 @@ XS(XS_Perf__Trace__Context_get_common_pc) dXSARGS; #endif if (items != 1) - Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::get_common_pc", "context"); + Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_pc", "context"); PERL_UNUSED_VAR(cv); /* -W */ { struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0))); int RETVAL; dXSTARG; - RETVAL = get_common_pc(context); + RETVAL = common_pc(context); XSprePUSH; PUSHi((IV)RETVAL); } XSRETURN(1); } -XS(XS_Perf__Trace__Context_get_common_flags); /* prototype to pass -Wmissing-prototypes */ -XS(XS_Perf__Trace__Context_get_common_flags) +XS(XS_Perf__Trace__Context_common_flags); /* prototype to pass -Wmissing-prototypes */ +XS(XS_Perf__Trace__Context_common_flags) { #ifdef dVAR dVAR; dXSARGS; @@ -71,22 +71,22 @@ XS(XS_Perf__Trace__Context_get_common_flags) dXSARGS; #endif if (items != 1) - Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::get_common_flags", "context"); + Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_flags", "context"); PERL_UNUSED_VAR(cv); /* -W */ { struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0))); int RETVAL; dXSTARG; - RETVAL = get_common_flags(context); + RETVAL = common_flags(context); XSprePUSH; PUSHi((IV)RETVAL); } XSRETURN(1); } -XS(XS_Perf__Trace__Context_get_common_lock_depth); /* prototype to pass -Wmissing-prototypes */ -XS(XS_Perf__Trace__Context_get_common_lock_depth) +XS(XS_Perf__Trace__Context_common_lock_depth); /* prototype to pass -Wmissing-prototypes */ +XS(XS_Perf__Trace__Context_common_lock_depth) { #ifdef dVAR dVAR; dXSARGS; @@ -94,14 +94,14 @@ XS(XS_Perf__Trace__Context_get_common_lock_depth) dXSARGS; #endif if (items != 1) - Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::get_common_lock_depth", "context"); + Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_lock_depth", "context"); PERL_UNUSED_VAR(cv); /* -W */ { struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0))); int RETVAL; dXSTARG; - RETVAL = get_common_lock_depth(context); + RETVAL = common_lock_depth(context); XSprePUSH; PUSHi((IV)RETVAL); } XSRETURN(1); @@ -124,9 +124,9 @@ XS(boot_Perf__Trace__Context) PERL_UNUSED_VAR(items); /* -W */ XS_VERSION_BOOTCHECK ; - newXSproto("Perf::Trace::Context::get_common_pc", XS_Perf__Trace__Context_get_common_pc, file, "$"); - newXSproto("Perf::Trace::Context::get_common_flags", XS_Perf__Trace__Context_get_common_flags, file, "$"); - newXSproto("Perf::Trace::Context::get_common_lock_depth", XS_Perf__Trace__Context_get_common_lock_depth, file, "$"); + newXSproto("Perf::Trace::Context::common_pc", XS_Perf__Trace__Context_common_pc, file, "$"); + newXSproto("Perf::Trace::Context::common_flags", XS_Perf__Trace__Context_common_flags, file, "$"); + newXSproto("Perf::Trace::Context::common_lock_depth", XS_Perf__Trace__Context_common_lock_depth, file, "$"); if (PL_unitcheckav) call_list(PL_scopestack_ix, PL_unitcheckav); XSRETURN_YES; diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs index 24facb3..fb78006 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs @@ -28,14 +28,14 @@ MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context PROTOTYPES: ENABLE int -get_common_pc(context) +common_pc(context) struct scripting_context * context int -get_common_flags(context) +common_flags(context) struct scripting_context * context int -get_common_lock_depth(context) +common_lock_depth(context) struct scripting_context * context diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c index d179ade..2e1cc3c 100644 --- a/tools/perf/util/trace-event-perl.c +++ b/tools/perf/util/trace-event-perl.c @@ -242,7 +242,7 @@ static inline struct event *find_cache_event(int type) return event; } -int get_common_pc(struct scripting_context *context) +int common_pc(struct scripting_context *context) { int pc; @@ -251,7 +251,7 @@ int get_common_pc(struct scripting_context *context) return pc; } -int get_common_flags(struct scripting_context *context) +int common_flags(struct scripting_context *context) { int flags; @@ -260,7 +260,7 @@ int get_common_flags(struct scripting_context *context) return flags; } -int get_common_lock_depth(struct scripting_context *context) +int common_lock_depth(struct scripting_context *context) { int lock_depth; diff --git a/tools/perf/util/trace-event-perl.h b/tools/perf/util/trace-event-perl.h index 666a864..8fe0d86 100644 --- a/tools/perf/util/trace-event-perl.h +++ b/tools/perf/util/trace-event-perl.h @@ -44,8 +44,8 @@ struct scripting_context { void *event_data; }; -int get_common_pc(struct scripting_context *context); -int get_common_flags(struct scripting_context *context); -int get_common_lock_depth(struct scripting_context *context); +int common_pc(struct scripting_context *context); +int common_flags(struct scripting_context *context); +int common_lock_depth(struct scripting_context *context); #endif /* __PERF_TRACE_EVENT_PERL_H */ -- cgit v0.10.2 From 8ea339adc0a48236008e59dd21564d71c37b331c Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Mon, 30 Nov 2009 01:18:49 -0600 Subject: perf trace/scripting: Add Fedora libperl install note to doc Fedora needs perl-ExtUtils-Embed for Perl scripting, which also brings along libperl-devel; note this info for the convenience of Fedora users. Signed-off-by: Tom Zanussi Cc: fweisbec@gmail.com Cc: rostedt@goodmis.org Cc: anton@samba.org Cc: hch@infradead.org LKML-Reference: <1259565529-6407-5-git-send-email-tzanussi@gmail.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README index adb99aa..9a97076 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/README +++ b/tools/perf/scripts/perl/Perf-Trace-Util/README @@ -36,8 +36,8 @@ INSTALLATION Building perf with perf trace Perl scripting should install this module in the right place. -You should make sure libperl is installed first e.g. apt-get install -libperl-dev. +You should make sure libperl and ExtUtils/Embed.pm are installed first +e.g. apt-get install libperl-dev or yum install perl-ExtUtils-Embed. DEPENDENCIES diff --git a/tools/perf/util/trace-event-perl.c b/tools/perf/util/trace-event-perl.c index 2e1cc3c..51e833f 100644 --- a/tools/perf/util/trace-event-perl.c +++ b/tools/perf/util/trace-event-perl.c @@ -577,7 +577,9 @@ struct scripting_ops perl_scripting_ops = { void setup_perl_scripting(void) { fprintf(stderr, "Perl scripting not supported." - " Install libperl-dev[el] and rebuild perf to get it.\n"); + " Install libperl and rebuild perf to enable it. e.g. " + "apt-get install libperl-dev (ubuntu), yum install " + "perl-ExtUtils-Embed (Fedora), etc.\n"); } #else void setup_perl_scripting(void) -- cgit v0.10.2 From 9eaa192d8988d621217a9e6071cd403fd6010496 Mon Sep 17 00:00:00 2001 From: "Helight.Xu" Date: Mon, 30 Nov 2009 18:33:51 +0800 Subject: x86: Fix a section mismatch in arch/x86/kernel/setup.c copy_edd() should be __init. warning msg: WARNING: vmlinux.o(.text+0x7759): Section mismatch in reference from the function copy_edd() to the variable .init.data:boot_params The function copy_edd() references the variable __initdata boot_params. This is often because copy_edd lacks a __initdata annotation or the annotation of boot_params is wrong. Signed-off-by: ZhenwenXu LKML-Reference: <4B139F8F.4000907@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e09f0e2..e020b2d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -247,7 +247,7 @@ EXPORT_SYMBOL(edd); * from boot_params into a safe place. * */ -static inline void copy_edd(void) +static inline void __init copy_edd(void) { memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, sizeof(edd.mbr_signature)); @@ -256,7 +256,7 @@ static inline void copy_edd(void) edd.edd_info_nr = boot_params.eddbuf_entries; } #else -static inline void copy_edd(void) +static inline void __init copy_edd(void) { } #endif -- cgit v0.10.2 From 04d8a9db89f00dee78d792d094dc573784ead643 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 30 Nov 2009 15:37:25 -0500 Subject: arch/alpha/kernel/sys_ruffian.c: Use DIV_ROUND_CLOSEST The kernel.h macro DIV_ROUND_CLOSEST performs the computation (x + d/2)/d but is perhaps more readable. The semantic patch that makes this change is as follows: (http://www.emn.fr/x-info/coccinelle/) // @haskernel@ @@ @depends on haskernel@ expression x,__divisor; @@ - (((x) + ((__divisor) / 2)) / (__divisor)) + DIV_ROUND_CLOSEST(x,__divisor) // Signed-off-by: Julia Lawall Cc: Ivan Kokshaysky Cc: Richard Henderson Signed-off-by: Matt Turner diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c index d9f9cfe..8de1046 100644 --- a/arch/alpha/kernel/sys_ruffian.c +++ b/arch/alpha/kernel/sys_ruffian.c @@ -66,7 +66,7 @@ ruffian_init_irq(void) common_init_isa_dma(); } -#define RUFFIAN_LATCH ((PIT_TICK_RATE + HZ / 2) / HZ) +#define RUFFIAN_LATCH DIV_ROUND_CLOSEST(PIT_TICK_RATE, HZ) static void __init ruffian_init_rtc(void) -- cgit v0.10.2 From cc9a2c8301683f73b7e0d1fc2cb5159110f3469f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 30 Nov 2009 15:38:19 -0500 Subject: arch/alpha/kernel: Add kmalloc NULL tests Check that the result of kmalloc is not NULL before passing it to other functions. The semantic match that finds this problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @@ expression *x; identifier f; constant char *C; @@ x = \(kmalloc\|kcalloc\|kzalloc\)(...); ... when != x == NULL when != x != NULL when != (x || ...) ( kfree(x) f(...,C,...,x,...) | *f(...,x,...) | *x->f ) // Signed-off-by: Julia Lawall Cc: Ivan Kokshaysky Cc: Richard Henderson Signed-off-by: Matt Turner diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 8e059e5..53dd2f1 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -1103,6 +1103,8 @@ marvel_agp_info(void) * Allocate the info structure. */ agp = kmalloc(sizeof(*agp), GFP_KERNEL); + if (!agp) + return NULL; /* * Fill it in. diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c index 7668649..219bf27 100644 --- a/arch/alpha/kernel/core_titan.c +++ b/arch/alpha/kernel/core_titan.c @@ -757,6 +757,8 @@ titan_agp_info(void) * Allocate the info structure. */ agp = kmalloc(sizeof(*agp), GFP_KERNEL); + if (!agp) + return NULL; /* * Fill it in. -- cgit v0.10.2 From 8627b96dd80dca440d91fbb1ec733be25912d0dd Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 18 Nov 2009 14:12:58 +0000 Subject: tty_port: handle the nonblocking open of a dead port corner case Some drivers allow O_NDELAY of a dead port (eg for setserial to work). In that situation we must not try to raise the carrier. Signed-off-by: Alan Cox Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c index 2e8552d..c63f3d3 100644 --- a/drivers/char/tty_port.c +++ b/drivers/char/tty_port.c @@ -219,8 +219,11 @@ int tty_port_block_til_ready(struct tty_port *port, /* if non-blocking mode is set we can pass directly to open unless the port has just hung up or is in another error state */ - if ((filp->f_flags & O_NONBLOCK) || - (tty->flags & (1 << TTY_IO_ERROR))) { + if (tty->flags & (1 << TTY_IO_ERROR)) { + port->flags |= ASYNC_NORMAL_ACTIVE; + return 0; + } + if (filp->f_flags & O_NONBLOCK) { /* Indicate we are open */ if (tty->termios->c_cflag & CBAUD) tty_port_raise_dtr_rts(port); -- cgit v0.10.2 From b037179f7a4fff7bd8279b0568a7dc663ebc9d15 Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Sun, 15 Nov 2009 05:42:18 +0100 Subject: bcm63xx_uart: Fix serial driver compile breakage. The driver missed a small API change while sitting in Ralf's tree, this patch makes it compile again. Signed-off-by: Maxime Bizon Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/serial/bcm63xx_uart.c b/drivers/serial/bcm63xx_uart.c index beddaa6..37ad0c4 100644 --- a/drivers/serial/bcm63xx_uart.c +++ b/drivers/serial/bcm63xx_uart.c @@ -242,7 +242,7 @@ static void bcm_uart_do_rx(struct uart_port *port) * higher than fifo size anyway since we're much faster than * serial port */ max_count = 32; - tty = port->info->port.tty; + tty = port->state->port.tty; do { unsigned int iestat, c, cstat; char flag; @@ -318,7 +318,7 @@ static void bcm_uart_do_tx(struct uart_port *port) return; } - xmit = &port->info->xmit; + xmit = &port->state->xmit; if (uart_circ_empty(xmit)) goto txq_empty; -- cgit v0.10.2 From 16173c7c2d79da7eb89b41acfdebd74b130f4339 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 24 Nov 2009 10:22:41 +0000 Subject: tty/of_serial: add missing ns16550a id Many boards have a bug-free ns16550 compatible serial port, which we should register as PORT_16550A. This introduces a new value "ns16550a" for the compatible property of of_serial to let a firmware choose that model instead of using the crippled PORT_16550 mode. Reported-by: Alon Ziv Signed-off-by: Michal Simek Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/serial/of_serial.c b/drivers/serial/of_serial.c index 02406ba..cdf172e 100644 --- a/drivers/serial/of_serial.c +++ b/drivers/serial/of_serial.c @@ -161,6 +161,7 @@ static int of_platform_serial_remove(struct of_device *ofdev) static struct of_device_id __devinitdata of_platform_serial_table[] = { { .type = "serial", .compatible = "ns8250", .data = (void *)PORT_8250, }, { .type = "serial", .compatible = "ns16450", .data = (void *)PORT_16450, }, + { .type = "serial", .compatible = "ns16550a", .data = (void *)PORT_16550A, }, { .type = "serial", .compatible = "ns16550", .data = (void *)PORT_16550, }, { .type = "serial", .compatible = "ns16750", .data = (void *)PORT_16750, }, { .type = "serial", .compatible = "ns16850", .data = (void *)PORT_16850, }, -- cgit v0.10.2 From 8c960e49d8beaca7c5b3967cede225bbba36bf43 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Wed, 11 Nov 2009 16:57:03 -0500 Subject: Staging: hv: Fix argument order in incorrect memset invocations in hyperv driver. Nearly every invocation of memset in drivers/staging/hv/StorVsc.c has its arguments the wrong way around. Signed-off-by: Dave Jones Cc: Hank Janssen Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/hv/StorVsc.c b/drivers/staging/hv/StorVsc.c index 14015c9..2f7c425 100644 --- a/drivers/staging/hv/StorVsc.c +++ b/drivers/staging/hv/StorVsc.c @@ -196,7 +196,7 @@ static int StorVscChannelInit(struct hv_device *Device) * Now, initiate the vsc/vsp initialization protocol on the open * channel */ - memset(request, sizeof(struct storvsc_request_extension), 0); + memset(request, 0, sizeof(struct storvsc_request_extension)); request->WaitEvent = osd_WaitEventCreate(); vstorPacket->Operation = VStorOperationBeginInitialization; @@ -233,7 +233,7 @@ static int StorVscChannelInit(struct hv_device *Device) DPRINT_INFO(STORVSC, "QUERY_PROTOCOL_VERSION_OPERATION..."); /* reuse the packet for version range supported */ - memset(vstorPacket, sizeof(struct vstor_packet), 0); + memset(vstorPacket, 0, sizeof(struct vstor_packet)); vstorPacket->Operation = VStorOperationQueryProtocolVersion; vstorPacket->Flags = REQUEST_COMPLETION_FLAG; @@ -266,7 +266,7 @@ static int StorVscChannelInit(struct hv_device *Device) /* Query channel properties */ DPRINT_INFO(STORVSC, "QUERY_PROPERTIES_OPERATION..."); - memset(vstorPacket, sizeof(struct vstor_packet), 0); + memset(vstorPacket, 0, sizeof(struct vstor_packet)); vstorPacket->Operation = VStorOperationQueryProperties; vstorPacket->Flags = REQUEST_COMPLETION_FLAG; vstorPacket->StorageChannelProperties.PortNumber = @@ -305,7 +305,7 @@ static int StorVscChannelInit(struct hv_device *Device) DPRINT_INFO(STORVSC, "END_INITIALIZATION_OPERATION..."); - memset(vstorPacket, sizeof(struct vstor_packet), 0); + memset(vstorPacket, 0, sizeof(struct vstor_packet)); vstorPacket->Operation = VStorOperationEndInitialization; vstorPacket->Flags = REQUEST_COMPLETION_FLAG; @@ -508,7 +508,7 @@ static int StorVscConnectToVsp(struct hv_device *Device) int ret; storDriver = (struct storvsc_driver_object *)Device->Driver; - memset(&props, sizeof(struct vmstorage_channel_properties), 0); + memset(&props, 0, sizeof(struct vmstorage_channel_properties)); /* Open the channel */ ret = Device->Driver->VmbusChannelInterface.Open(Device, -- cgit v0.10.2 From 5996b3ddc422a16d53b8acf4980d0d6e8b2bf1ed Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Fri, 20 Nov 2009 16:29:17 +0000 Subject: Staging: hv: Fix vmbus event handler bug The flag ENABLE_POLLING is always enabled in original Makefile, but accidently removed during porting to mainline kernel. The patch fixes this bug which can cause stalled network communication. Credit needs to go to Eric Sesterhenn For pointing out a typo in the original code as well. Signed-off-by: Hank Janssen Signed-off-by: Haiyang Zhang Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/hv/Channel.c b/drivers/staging/hv/Channel.c index d649ee1..746370e 100644 --- a/drivers/staging/hv/Channel.c +++ b/drivers/staging/hv/Channel.c @@ -611,7 +611,7 @@ void VmbusChannelClose(struct vmbus_channel *Channel) /* Stop callback and cancel the timer asap */ Channel->OnChannelCallback = NULL; - del_timer(&Channel->poll_timer); + del_timer_sync(&Channel->poll_timer); /* Send a closing message */ info = kmalloc(sizeof(*info) + @@ -978,14 +978,10 @@ void VmbusChannelOnChannelEvent(struct vmbus_channel *Channel) { DumpVmbusChannel(Channel); ASSERT(Channel->OnChannelCallback); -#ifdef ENABLE_POLLING - del_timer(&Channel->poll_timer); - Channel->OnChannelCallback(Channel->ChannelCallbackContext); - channel->poll_timer.expires(jiffies + usecs_to_jiffies(100); - add_timer(&channel->poll_timer); -#else + Channel->OnChannelCallback(Channel->ChannelCallbackContext); -#endif + + mod_timer(&Channel->poll_timer, jiffies + usecs_to_jiffies(100)); } /** @@ -997,10 +993,6 @@ void VmbusChannelOnTimer(unsigned long data) if (channel->OnChannelCallback) { channel->OnChannelCallback(channel->ChannelCallbackContext); -#ifdef ENABLE_POLLING - channel->poll_timer.expires(jiffies + usecs_to_jiffies(100); - add_timer(&channel->poll_timer); -#endif } } diff --git a/drivers/staging/hv/ChannelMgmt.c b/drivers/staging/hv/ChannelMgmt.c index 3db62ca..ef38467 100644 --- a/drivers/staging/hv/ChannelMgmt.c +++ b/drivers/staging/hv/ChannelMgmt.c @@ -119,7 +119,7 @@ static inline void ReleaseVmbusChannel(void *context) */ void FreeVmbusChannel(struct vmbus_channel *Channel) { - del_timer(&Channel->poll_timer); + del_timer_sync(&Channel->poll_timer); /* * We have to release the channel's workqueue/thread in the vmbus's -- cgit v0.10.2 From d0e94d17ed8590d53252212414a627125825b379 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 23 Nov 2009 17:00:22 +0000 Subject: Staging: hv: Fix some missing author names Fix some missing author names. They were accidentally removed by someone within Microsoft before the files were sent for inclusion in the kernel. Signed-off-by: Hank Janssen Signed-off-by: Haiyang Zhang Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/hv/BlkVsc.c b/drivers/staging/hv/BlkVsc.c index 51aa861..a48ee3a 100644 --- a/drivers/staging/hv/BlkVsc.c +++ b/drivers/staging/hv/BlkVsc.c @@ -16,6 +16,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang * Hank Janssen * */ diff --git a/drivers/staging/hv/NetVsc.c b/drivers/staging/hv/NetVsc.c index d384c0d..1c717f9 100644 --- a/drivers/staging/hv/NetVsc.c +++ b/drivers/staging/hv/NetVsc.c @@ -15,6 +15,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang * Hank Janssen */ #include diff --git a/drivers/staging/hv/NetVsc.h b/drivers/staging/hv/NetVsc.h index 3e7112f..6e0e034 100644 --- a/drivers/staging/hv/NetVsc.h +++ b/drivers/staging/hv/NetVsc.h @@ -16,6 +16,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang * Hank Janssen * */ diff --git a/drivers/staging/hv/blkvsc_drv.c b/drivers/staging/hv/blkvsc_drv.c index 99c4926..62b2828 100644 --- a/drivers/staging/hv/blkvsc_drv.c +++ b/drivers/staging/hv/blkvsc_drv.c @@ -15,6 +15,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang * Hank Janssen */ #include diff --git a/drivers/staging/hv/netvsc_drv.c b/drivers/staging/hv/netvsc_drv.c index 3192d50..0d7459e 100644 --- a/drivers/staging/hv/netvsc_drv.c +++ b/drivers/staging/hv/netvsc_drv.c @@ -15,6 +15,7 @@ * Place - Suite 330, Boston, MA 02111-1307 USA. * * Authors: + * Haiyang Zhang * Hank Janssen */ #include -- cgit v0.10.2 From fb08808ca252a76519d3b17fc3b6cfc6b2806e55 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 21 Oct 2009 14:42:11 +0200 Subject: Staging: update TODO files Remove my mail address. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/staging/rtl8187se/TODO b/drivers/staging/rtl8187se/TODO index c09a916..a762e79 100644 --- a/drivers/staging/rtl8187se/TODO +++ b/drivers/staging/rtl8187se/TODO @@ -11,5 +11,4 @@ TODO: - sparse fixes - integrate with drivers/net/wireless/rtl818x -Please send any patches to Greg Kroah-Hartman and -Bartlomiej Zolnierkiewicz . +Please send any patches to Greg Kroah-Hartman . diff --git a/drivers/staging/rtl8192su/TODO b/drivers/staging/rtl8192su/TODO index b13be9e..f11eec7 100644 --- a/drivers/staging/rtl8192su/TODO +++ b/drivers/staging/rtl8192su/TODO @@ -14,5 +14,4 @@ TODO: - sparse fixes - integrate with drivers/net/wireless/rtl818x -Please send any patches to Greg Kroah-Hartman and -Bartlomiej Zolnierkiewicz . +Please send any patches to Greg Kroah-Hartman . diff --git a/drivers/staging/vt6655/TODO b/drivers/staging/vt6655/TODO index 8462cd1..cb04aaa 100644 --- a/drivers/staging/vt6655/TODO +++ b/drivers/staging/vt6655/TODO @@ -16,6 +16,5 @@ TODO: - sparse fixes - integrate with drivers/net/wireless -Please send any patches to Greg Kroah-Hartman , -Forest Bond and Bartlomiej Zolnierkiewicz -. +Please send any patches to Greg Kroah-Hartman +and Forest Bond . diff --git a/drivers/staging/vt6656/TODO b/drivers/staging/vt6656/TODO index 17cf50c..a318995 100644 --- a/drivers/staging/vt6656/TODO +++ b/drivers/staging/vt6656/TODO @@ -15,6 +15,5 @@ TODO: - sparse fixes - integrate with drivers/net/wireless -Please send any patches to Greg Kroah-Hartman , -Forest Bond and Bartlomiej Zolnierkiewicz -. +Please send any patches to Greg Kroah-Hartman +and Forest Bond . -- cgit v0.10.2 From c2f6595fbdb408d3d6850cfae590c8fa93e27399 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 18 Nov 2009 11:37:15 -0500 Subject: USB: EHCI: don't send Clear-TT-Buffer following a STALL This patch (as1304) fixes a regression in ehci-hcd. Evidently some hubs don't handle Clear-TT-Buffer requests correctly, so we should avoid sending them when they don't appear to be absolutely necessary. The reported symptom is that output on a downstream audio device cuts out because the hub stops relaying isochronous packets. The patch prevents Clear-TT-Buffer requests from being sent following a STALL handshake. In theory a STALL indicates either that the downstream device sent a STALL or that no matching TT buffer could be found. In either case, the transfer is completed and the TT buffer does not remain busy, so it doesn't need to be cleared. Also, the patch fixes a minor flaw in the code that actually sends the Clear-TT-Buffer requests. Although the pipe direction isn't really used for control transfers, it should be a Send rather than a Receive. Signed-off-by: Alan Stern Reported-by: Javier Kohen CC: David Brownell Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 5ce8391..0f857e6 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -444,7 +444,7 @@ resubmit: static inline int hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt) { - return usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0), + return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo, tt, NULL, 0, 1000); } diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 00ad9ce..139a2cc 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -487,8 +487,20 @@ halt: * we must clear the TT buffer (11.17.5). */ if (unlikely(last_status != -EINPROGRESS && - last_status != -EREMOTEIO)) - ehci_clear_tt_buffer(ehci, qh, urb, token); + last_status != -EREMOTEIO)) { + /* The TT's in some hubs malfunction when they + * receive this request following a STALL (they + * stop sending isochronous packets). Since a + * STALL can't leave the TT buffer in a busy + * state (if you believe Figures 11-48 - 11-51 + * in the USB 2.0 spec), we won't clear the TT + * buffer in this case. Strictly speaking this + * is a violation of the spec. + */ + if (last_status != -EPIPE) + ehci_clear_tt_buffer(ehci, qh, urb, + token); + } } /* if we're removing something not at the queue head, -- cgit v0.10.2 From cea83241b3a84499c4f9b12f8288f787e7aa6383 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Wed, 18 Nov 2009 22:51:18 +0300 Subject: USB: musb_gadget: fix STALL handling The driver incorrectly cancels the mass-storage device CSW request (which leads to device reset) due to giving back URB at the head of endpoint's queue after sending each STALL handshake; stop doing that and start checking for the queue being non-empty before stalling an endpoint and disallowing stall in such case in musb_gadget_set_halt() like the other gadget drivers do. Moreover, the driver starts Rx request despite of the endpoint being halted -- fix this by moving the SendStall bit check from musb_g_rx() to rxstate(). And we also sometimes get into rxstate() with DMA still active after clearing an endpoint's halt (not clear why), so bail out in this case, similarly to what txstate() does... While at it, also do the following changes : - in musb_gadget_set_halt(), remove pointless Tx FIFO flushing (the driver does not allow stalling with non-empty Tx FIFO anyway); - in rxstate(), stop pointlessly zeroing the 'csr' variable; - in musb_gadget_set_halt(), move the 'done' label to a more proper place; - in musb_g_rx(), eliminate the 'done' label completely... Signed-off-by: Sergei Shtylyov Cc: Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 8b3c4e2..74073f9 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -4,6 +4,7 @@ * Copyright 2005 Mentor Graphics Corporation * Copyright (C) 2005-2006 by Texas Instruments * Copyright (C) 2006-2007 Nokia Corporation + * Copyright (C) 2009 MontaVista Software, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -436,14 +437,6 @@ void musb_g_tx(struct musb *musb, u8 epnum) csr |= MUSB_TXCSR_P_WZC_BITS; csr &= ~MUSB_TXCSR_P_SENTSTALL; musb_writew(epio, MUSB_TXCSR, csr); - if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) { - dma->status = MUSB_DMA_STATUS_CORE_ABORT; - musb->dma_controller->channel_abort(dma); - } - - if (request) - musb_g_giveback(musb_ep, request, -EPIPE); - break; } @@ -582,15 +575,25 @@ void musb_g_tx(struct musb *musb, u8 epnum) */ static void rxstate(struct musb *musb, struct musb_request *req) { - u16 csr = 0; const u8 epnum = req->epnum; struct usb_request *request = &req->request; struct musb_ep *musb_ep = &musb->endpoints[epnum].ep_out; void __iomem *epio = musb->endpoints[epnum].regs; unsigned fifo_count = 0; u16 len = musb_ep->packet_sz; + u16 csr = musb_readw(epio, MUSB_RXCSR); - csr = musb_readw(epio, MUSB_RXCSR); + /* We shouldn't get here while DMA is active, but we do... */ + if (dma_channel_status(musb_ep->dma) == MUSB_DMA_STATUS_BUSY) { + DBG(4, "DMA pending...\n"); + return; + } + + if (csr & MUSB_RXCSR_P_SENDSTALL) { + DBG(5, "%s stalling, RXCSR %04x\n", + musb_ep->end_point.name, csr); + return; + } if (is_cppi_enabled() && musb_ep->dma) { struct dma_controller *c = musb->dma_controller; @@ -761,19 +764,10 @@ void musb_g_rx(struct musb *musb, u8 epnum) csr, dma ? " (dma)" : "", request); if (csr & MUSB_RXCSR_P_SENTSTALL) { - if (dma_channel_status(dma) == MUSB_DMA_STATUS_BUSY) { - dma->status = MUSB_DMA_STATUS_CORE_ABORT; - (void) musb->dma_controller->channel_abort(dma); - request->actual += musb_ep->dma->actual_len; - } - csr |= MUSB_RXCSR_P_WZC_BITS; csr &= ~MUSB_RXCSR_P_SENTSTALL; musb_writew(epio, MUSB_RXCSR, csr); - - if (request) - musb_g_giveback(musb_ep, request, -EPIPE); - goto done; + return; } if (csr & MUSB_RXCSR_P_OVERRUN) { @@ -795,7 +789,7 @@ void musb_g_rx(struct musb *musb, u8 epnum) DBG((csr & MUSB_RXCSR_DMAENAB) ? 4 : 1, "%s busy, csr %04x\n", musb_ep->end_point.name, csr); - goto done; + return; } if (dma && (csr & MUSB_RXCSR_DMAENAB)) { @@ -826,22 +820,15 @@ void musb_g_rx(struct musb *musb, u8 epnum) if ((request->actual < request->length) && (musb_ep->dma->actual_len == musb_ep->packet_sz)) - goto done; + return; #endif musb_g_giveback(musb_ep, request, 0); request = next_request(musb_ep); if (!request) - goto done; - - /* don't start more i/o till the stall clears */ - musb_ep_select(mbase, epnum); - csr = musb_readw(epio, MUSB_RXCSR); - if (csr & MUSB_RXCSR_P_SENDSTALL) - goto done; + return; } - /* analyze request if the ep is hot */ if (request) rxstate(musb, to_musb_request(request)); @@ -849,8 +836,6 @@ void musb_g_rx(struct musb *musb, u8 epnum) DBG(3, "packet waiting for %s%s request\n", musb_ep->desc ? "" : "inactive ", musb_ep->end_point.name); - -done: return; } @@ -1244,7 +1229,7 @@ int musb_gadget_set_halt(struct usb_ep *ep, int value) void __iomem *mbase; unsigned long flags; u16 csr; - struct musb_request *request = NULL; + struct musb_request *request; int status = 0; if (!ep) @@ -1260,24 +1245,29 @@ int musb_gadget_set_halt(struct usb_ep *ep, int value) musb_ep_select(mbase, epnum); - /* cannot portably stall with non-empty FIFO */ request = to_musb_request(next_request(musb_ep)); - if (value && musb_ep->is_in) { - csr = musb_readw(epio, MUSB_TXCSR); - if (csr & MUSB_TXCSR_FIFONOTEMPTY) { - DBG(3, "%s fifo busy, cannot halt\n", ep->name); - spin_unlock_irqrestore(&musb->lock, flags); - return -EAGAIN; + if (value) { + if (request) { + DBG(3, "request in progress, cannot halt %s\n", + ep->name); + status = -EAGAIN; + goto done; + } + /* Cannot portably stall with non-empty FIFO */ + if (musb_ep->is_in) { + csr = musb_readw(epio, MUSB_TXCSR); + if (csr & MUSB_TXCSR_FIFONOTEMPTY) { + DBG(3, "FIFO busy, cannot halt %s\n", ep->name); + status = -EAGAIN; + goto done; + } } - } /* set/clear the stall and toggle bits */ DBG(2, "%s: %s stall\n", ep->name, value ? "set" : "clear"); if (musb_ep->is_in) { csr = musb_readw(epio, MUSB_TXCSR); - if (csr & MUSB_TXCSR_FIFONOTEMPTY) - csr |= MUSB_TXCSR_FLUSHFIFO; csr |= MUSB_TXCSR_P_WZC_BITS | MUSB_TXCSR_CLRDATATOG; if (value) @@ -1300,14 +1290,13 @@ int musb_gadget_set_halt(struct usb_ep *ep, int value) musb_writew(epio, MUSB_RXCSR, csr); } -done: - /* maybe start the first request in the queue */ if (!musb_ep->busy && !value && request) { DBG(3, "restarting the request\n"); musb_ep_restart(musb, request); } +done: spin_unlock_irqrestore(&musb->lock, flags); return status; } -- cgit v0.10.2 From 0de6ab8b91f2e1e8e7fc66a8b5c5e8ca82ea16b7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 17 Nov 2009 19:10:48 -0800 Subject: USB: ftdi_sio: Keep going when write errors are encountered. The use of urb->actual_length to update tx_outstanding_bytes implicitly assumes that the number of bytes actually written is the same as the number of bytes we tried to write. On error that assumption is violated so just use transfer_buffer_length the number of bytes we intended to write to the device. If an error occurs we need to fall through and call usb_serial_port_softint to wake up processes waiting in tty_wait_until_sent. Signed-off-by: Eric W. Biederman Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 9c60d6d..ebcc6d0 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1937,7 +1937,7 @@ static void ftdi_write_bulk_callback(struct urb *urb) return; } /* account for transferred data */ - countback = urb->actual_length; + countback = urb->transfer_buffer_length; data_offset = priv->write_offset; if (data_offset > 0) { /* Subtract the control bytes */ @@ -1950,7 +1950,6 @@ static void ftdi_write_bulk_callback(struct urb *urb) if (status) { dbg("nonzero write bulk status received: %d", status); - return; } usb_serial_port_softint(port); -- cgit v0.10.2 From c5deb832d7a3f9618b09e6eeaa91a1a845c90c65 Mon Sep 17 00:00:00 2001 From: Thomas Dahlmann Date: Tue, 17 Nov 2009 14:18:27 -0800 Subject: usb: amd5536udc: fixed shared interrupt bug and warning oops - fixed shared interrupt bug reported by Vadim Lobanov - fixed possible warning oops on driver unload when connected - prevent interrupt flood in PIO mode ("modprobe amd5536udc use_dma=0") when using gadget ether Signed-off-by: Thomas Dahlmann Cc: Robert Richter Cc: David Brownell Cc: stable Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c index d5b6596..731150d4 100644 --- a/drivers/usb/gadget/amd5536udc.c +++ b/drivers/usb/gadget/amd5536udc.c @@ -1213,7 +1213,12 @@ udc_queue(struct usb_ep *usbep, struct usb_request *usbreq, gfp_t gfp) tmp &= AMD_UNMASK_BIT(ep->num); writel(tmp, &dev->regs->ep_irqmsk); } - } + } else if (ep->in) { + /* enable ep irq */ + tmp = readl(&dev->regs->ep_irqmsk); + tmp &= AMD_UNMASK_BIT(ep->num); + writel(tmp, &dev->regs->ep_irqmsk); + } } else if (ep->dma) { @@ -2005,18 +2010,17 @@ __acquires(dev->lock) { int tmp; - /* empty queues and init hardware */ - udc_basic_init(dev); - for (tmp = 0; tmp < UDC_EP_NUM; tmp++) { - empty_req_queue(&dev->ep[tmp]); - } - if (dev->gadget.speed != USB_SPEED_UNKNOWN) { spin_unlock(&dev->lock); driver->disconnect(&dev->gadget); spin_lock(&dev->lock); } - /* init */ + + /* empty queues and init hardware */ + udc_basic_init(dev); + for (tmp = 0; tmp < UDC_EP_NUM; tmp++) + empty_req_queue(&dev->ep[tmp]); + udc_setup_endpoints(dev); } @@ -2472,6 +2476,13 @@ static irqreturn_t udc_data_in_isr(struct udc *dev, int ep_ix) } } + } else if (!use_dma && ep->in) { + /* disable interrupt */ + tmp = readl( + &dev->regs->ep_irqmsk); + tmp |= AMD_BIT(ep->num); + writel(tmp, + &dev->regs->ep_irqmsk); } } /* clear status bits */ @@ -3279,6 +3290,17 @@ static int udc_pci_probe( goto finished; } + spin_lock_init(&dev->lock); + /* udc csr registers base */ + dev->csr = dev->virt_addr + UDC_CSR_ADDR; + /* dev registers base */ + dev->regs = dev->virt_addr + UDC_DEVCFG_ADDR; + /* ep registers base */ + dev->ep_regs = dev->virt_addr + UDC_EPREGS_ADDR; + /* fifo's base */ + dev->rxfifo = (u32 __iomem *)(dev->virt_addr + UDC_RXFIFO_ADDR); + dev->txfifo = (u32 __iomem *)(dev->virt_addr + UDC_TXFIFO_ADDR); + if (request_irq(pdev->irq, udc_irq, IRQF_SHARED, name, dev) != 0) { dev_dbg(&dev->pdev->dev, "request_irq(%d) fail\n", pdev->irq); kfree(dev); @@ -3331,7 +3353,6 @@ static int udc_probe(struct udc *dev) udc_pollstall_timer.data = 0; /* device struct setup */ - spin_lock_init(&dev->lock); dev->gadget.ops = &udc_ops; dev_set_name(&dev->gadget.dev, "gadget"); @@ -3340,16 +3361,6 @@ static int udc_probe(struct udc *dev) dev->gadget.name = name; dev->gadget.is_dualspeed = 1; - /* udc csr registers base */ - dev->csr = dev->virt_addr + UDC_CSR_ADDR; - /* dev registers base */ - dev->regs = dev->virt_addr + UDC_DEVCFG_ADDR; - /* ep registers base */ - dev->ep_regs = dev->virt_addr + UDC_EPREGS_ADDR; - /* fifo's base */ - dev->rxfifo = (u32 __iomem *)(dev->virt_addr + UDC_RXFIFO_ADDR); - dev->txfifo = (u32 __iomem *)(dev->virt_addr + UDC_TXFIFO_ADDR); - /* init registers, interrupts, ... */ startup_registers(dev); -- cgit v0.10.2 From 1230435c258e34b47ab7adc3db572c88a284234a Mon Sep 17 00:00:00 2001 From: Ajay Kumar Gupta Date: Tue, 17 Nov 2009 15:22:54 +0530 Subject: USB: musb: Remove unwanted message in boot log Removes below unnecessary log of almost 28 lines during boot. musb_hdrc: hw_ep 0shared, max 64 musb_hdrc: hw_ep 1tx, max 512 musb_hdrc: hw_ep 1rx, max 512 ... ... musb_hdrc: hw_ep 13shared, max 4096 musb_hdrc: hw_ep 14shared, max 1024 musb_hdrc: hw_ep 15shared, max 1024 Signed-off-by: Ajay Kumar Gupta Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 3a61ddb..547e0e3 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1450,7 +1450,7 @@ static int __init musb_core_init(u16 musb_type, struct musb *musb) #endif if (hw_ep->max_packet_sz_tx) { - printk(KERN_DEBUG + DBG(1, "%s: hw_ep %d%s, %smax %d\n", musb_driver_name, i, hw_ep->is_shared_fifo ? "shared" : "tx", @@ -1459,7 +1459,7 @@ static int __init musb_core_init(u16 musb_type, struct musb *musb) hw_ep->max_packet_sz_tx); } if (hw_ep->max_packet_sz_rx && !hw_ep->is_shared_fifo) { - printk(KERN_DEBUG + DBG(1, "%s: hw_ep %d%s, %smax %d\n", musb_driver_name, i, "rx", -- cgit v0.10.2 From dfeffa531ccf9c31f2f55df6d7ca86eec92142df Mon Sep 17 00:00:00 2001 From: Ajay Kumar Gupta Date: Tue, 17 Nov 2009 15:22:55 +0530 Subject: USB: musb: fix ISOC Tx programming for CPPI DMAs Isochronous Tx DMA is getting programmed but never getting started for CPPI and TUSB DMAs and thus Isochronous Tx doesn't work. Fixing it by starting DMAs using musb_h_tx_dma_start(). Signed-off-by: Swaminathan S Signed-off-by: Babu Ravi Signed-off-by: Ajay Kumar Gupta Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index cf94511..e3ab40a 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -1301,8 +1301,11 @@ void musb_host_tx(struct musb *musb, u8 epnum) return; } else if (usb_pipeisoc(pipe) && dma) { if (musb_tx_dma_program(musb->dma_controller, hw_ep, qh, urb, - offset, length)) + offset, length)) { + if (is_cppi_enabled() || tusb_dma_omap()) + musb_h_tx_dma_start(hw_ep); return; + } } else if (tx_csr & MUSB_TXCSR_DMAENAB) { DBG(1, "not complete, but DMA enabled?\n"); return; -- cgit v0.10.2 From 5542bc2ac7b52c021fc9c7a96329955491b7e763 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= Date: Tue, 17 Nov 2009 15:22:56 +0530 Subject: USB: musb: respect usb_request->zero in control requests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In gadget mode the answer to a control request should be followed by a zero-length packet if the amount transferred is an exact multiple of the endpoint's packet size and the requests has its "zero" flag set. This patch prevents the request from being immediately removed from the queue when a control IN transfer ends on a full packet and "zero" is set. The next time ep0_txstate is entered, a zero-length packet is queued and the request is removed as fifo_count is 0. Signed-off-by: Daniel Glöckner Signed-off-by: Ajay Kumar Gupta Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/musb_gadget_ep0.c b/drivers/usb/musb/musb_gadget_ep0.c index 7a67786..522efb3 100644 --- a/drivers/usb/musb/musb_gadget_ep0.c +++ b/drivers/usb/musb/musb_gadget_ep0.c @@ -511,7 +511,8 @@ static void ep0_txstate(struct musb *musb) /* update the flags */ if (fifo_count < MUSB_MAX_END0_PACKET - || request->actual == request->length) { + || (request->actual == request->length + && !request->zero)) { musb->ep0_state = MUSB_EP0_STAGE_STATUSOUT; csr |= MUSB_CSR0_P_DATAEND; } else -- cgit v0.10.2 From 8d6499e5bde91ad05dea4f666bdfe79e65e7cf96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= Date: Tue, 17 Nov 2009 15:22:57 +0530 Subject: USB: musb: Fix CPPI IRQs not being signaled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On tx channel abort a cppi interrupt is generated for a short time by setting the lowest bit of the TCPPICOMPPTR register. It is then reset immediately by clearing the bit. When the interrupt handler is run, it does not detect an interrupt in the TCPPIMSKSR or RCPPIMSKSR registers and thus exits early without writing the TCPPIEOIR register. It appears that this inhibits further cppi interrupts until the handler is called by chance, f.ex. from davinci_interrupt(). By moving the unmasking of the interrupt below the writes to TCPPICOMPPTR, no interrupt is generated and no write to TCPPIEOIR is necessary. Signed-off-by: Daniel Glöckner Signed-off-by: Ajay Kumar Gupta Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index c3577bb..ef2332a 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -1442,11 +1442,6 @@ static int cppi_channel_abort(struct dma_channel *channel) musb_writew(regs, MUSB_TXCSR, value); musb_writew(regs, MUSB_TXCSR, value); - /* re-enable interrupt */ - if (enabled) - musb_writel(tibase, DAVINCI_TXCPPI_INTENAB_REG, - (1 << cppi_ch->index)); - /* While we scrub the TX state RAM, ensure that we clean * up any interrupt that's currently asserted: * 1. Write to completion Ptr value 0x1(bit 0 set) @@ -1459,6 +1454,11 @@ static int cppi_channel_abort(struct dma_channel *channel) cppi_reset_tx(tx_ram, 1); musb_writel(&tx_ram->tx_complete, 0, 0); + /* re-enable interrupt */ + if (enabled) + musb_writel(tibase, DAVINCI_TXCPPI_INTENAB_REG, + (1 << cppi_ch->index)); + cppi_dump_tx(5, cppi_ch, " (done teardown)"); /* REVISIT tx side _should_ clean up the same way -- cgit v0.10.2 From ee4ecb8ac63a5792bec448037d4b82ec4144f94b Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Fri, 27 Nov 2009 15:17:59 +0100 Subject: USB: work around for EHCI with quirky periodic schedules a quirky chipset needs periodic schedules to run for a minimum time before they can be disabled again. This enforces the requirement with a time stamp and a calculated delay Signed-off-by: Oliver Neukum Cc: stable Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 9835e07..f5f5601 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -676,6 +677,7 @@ static int ehci_run (struct usb_hcd *hcd) ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */ msleep(5); up_write(&ehci_cf_port_reset_rwsem); + ehci->last_periodic_enable = ktime_get_real(); temp = HC_VERSION(ehci_readl(ehci, &ehci->caps->hc_capbase)); ehci_info (ehci, diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 378861b..ead5f4f 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -111,6 +111,10 @@ static int ehci_pci_setup(struct usb_hcd *hcd) switch (pdev->vendor) { case PCI_VENDOR_ID_INTEL: ehci->need_io_watchdog = 0; + if (pdev->device == 0x27cc) { + ehci->broken_periodic = 1; + ehci_info(ehci, "using broken periodic workaround\n"); + } break; case PCI_VENDOR_ID_TDI: if (pdev->device == PCI_DEVICE_ID_TDI_EHCI) { diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c index b25cdea..a5535b5 100644 --- a/drivers/usb/host/ehci-sched.c +++ b/drivers/usb/host/ehci-sched.c @@ -475,6 +475,8 @@ static int enable_periodic (struct ehci_hcd *ehci) /* make sure ehci_work scans these */ ehci->next_uframe = ehci_readl(ehci, &ehci->regs->frame_index) % (ehci->periodic_size << 3); + if (unlikely(ehci->broken_periodic)) + ehci->last_periodic_enable = ktime_get_real(); return 0; } @@ -486,6 +488,16 @@ static int disable_periodic (struct ehci_hcd *ehci) if (--ehci->periodic_sched) return 0; + if (unlikely(ehci->broken_periodic)) { + /* delay experimentally determined */ + ktime_t safe = ktime_add_us(ehci->last_periodic_enable, 1000); + ktime_t now = ktime_get_real(); + s64 delay = ktime_us_delta(safe, now); + + if (unlikely(delay > 0)) + udelay(delay); + } + /* did setting PSE not take effect yet? * takes effect only at frame boundaries... */ diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index 064e768..2d85e21 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -118,6 +118,7 @@ struct ehci_hcd { /* one per controller */ unsigned stamp; unsigned random_frame; unsigned long next_statechange; + ktime_t last_periodic_enable; u32 command; /* SILICON QUIRKS */ @@ -127,6 +128,7 @@ struct ehci_hcd { /* one per controller */ unsigned big_endian_desc:1; unsigned has_amcc_usb23:1; unsigned need_io_watchdog:1; + unsigned broken_periodic:1; /* required for usb32 quirk */ #define OHCI_CTRL_HCFS (3 << 6) -- cgit v0.10.2 From 0ec8648379334f1e127ebd5e57a625890f116824 Mon Sep 17 00:00:00 2001 From: Gernot Hillier Date: Fri, 27 Nov 2009 13:49:23 +0100 Subject: USB: Add support for Mobilcom Debitel USB UMTS Surf-Stick to option driver This patch adds the vendor and device id for the Mobilcom Debitel UMTS surf stick (a.k.a. 4G Systems XSStick W14, MobiData MBD-200HU, ...). To see these ids, you need to switch the stick to modem operation first with the help of usb_modeswitch. This makes it switch from 1c9e:f000 to 1c9e:9603 and thus be recognized by the option driver. Signed-off-by: Gernot Hillier Signed-off-by: Greg Kroah-Hartman diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 319aaf97..0577e4b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -336,6 +336,10 @@ static int option_resume(struct usb_serial *serial); #define AIRPLUS_VENDOR_ID 0x1011 #define AIRPLUS_PRODUCT_MCD650 0x3198 +/* 4G Systems products */ +#define FOUR_G_SYSTEMS_VENDOR_ID 0x1c9e +#define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 + static struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_RICOLA) }, @@ -599,6 +603,7 @@ static struct usb_device_id option_ids[] = { { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S) }, { USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) }, { USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) }, + { USB_DEVICE(FOUR_G_SYSTEMS_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- cgit v0.10.2 From 745dd2405e281d96c0a449103bdf6a895048f28c Mon Sep 17 00:00:00 2001 From: Michael Cree Date: Mon, 30 Nov 2009 22:44:40 -0500 Subject: Alpha: Rearrange thread info flags fixing two regressions The removal of the TIF_NOTIFY_RESUME flag, commit a583f1b54249b "remove unused TIF_NOTIFY_RESUME flag," resulted in incorrect setting of the unaligned access control flags by the prctl syscall. The re-addition of the TIF_NOTIFY_RESUME flag, commit d0420c83f39f "KEYS: Extend TIF_NOTIFY_RESUME to (almost) all architectures [try #6]" further caused problems, namely incorrect operands to assembler code as evidenced by: AS arch/alpha/kernel/entry.o arch/alpha/kernel/entry.S: Assembler messages: arch/alpha/kernel/entry.S:326: Warning: operand out of range (0x0000000000000406 is not between 0x0000000000000000 and 0x00000000000000ff) Both regressions fixed by (1) rearranging TIF_NOTIFY_RESUME flag to be in lower 8 bits of the thread info flags, and (2) making sure that ALPHA_UAC_SHIFT matches the rearrangement of the thread info flags. Signed-off-by: Michael Cree Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: David Howells , Signed-off-by: Matt Turner diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 815680b..b3e8886 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -61,21 +61,24 @@ register struct thread_info *__current_thread_info __asm__("$8"); /* * Thread information flags: * - these are process state flags and used from assembly - * - pending work-to-be-done flags come first to fit in and immediate operand. + * - pending work-to-be-done flags come first and must be assigned to be + * within bits 0 to 7 to fit in and immediate operand. + * - ALPHA_UAC_SHIFT below must be kept consistent with the unaligned + * control flags. * * TIF_SYSCALL_TRACE is known to be 0 via blbs. */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 3 /* poll_idle is polling NEED_RESCHED */ -#define TIF_DIE_IF_KERNEL 4 /* dik recursion lock */ -#define TIF_UAC_NOPRINT 5 /* see sysinfo.h */ -#define TIF_UAC_NOFIX 6 -#define TIF_UAC_SIGBUS 7 -#define TIF_MEMDIE 8 -#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ -#define TIF_NOTIFY_RESUME 10 /* callback before returning to user */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_POLLING_NRFLAG 8 /* poll_idle is polling NEED_RESCHED */ +#define TIF_DIE_IF_KERNEL 9 /* dik recursion lock */ +#define TIF_UAC_NOPRINT 10 /* see sysinfo.h */ +#define TIF_UAC_NOFIX 11 +#define TIF_UAC_SIGBUS 12 +#define TIF_MEMDIE 13 +#define TIF_RESTORE_SIGMASK 14 /* restore signal mask in do_signal */ #define TIF_FREEZE 16 /* is freezing for suspend */ #define _TIF_SYSCALL_TRACE (1< Date: Mon, 30 Nov 2009 22:51:31 -0500 Subject: alpha: Fixup last users of irq_chip->typename The typename member of struct irq_chip was kept for migration purposes and is obsolete since more than 2 years. Fix up the leftovers. Signed-off-by: Thomas Gleixner Cc: Richard Henderson Cc: linux-alpha@vger.kernel.org Signed-off-by: Matt Turner diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index cc78346..c0de072 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -92,7 +92,7 @@ show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_irqs_cpu(irq, j)); #endif - seq_printf(p, " %14s", irq_desc[irq].chip->typename); + seq_printf(p, " %14s", irq_desc[irq].chip->name); seq_printf(p, " %c%s", (action->flags & IRQF_DISABLED)?'+':' ', action->name); diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c index 38c805d..cfde865 100644 --- a/arch/alpha/kernel/irq_alpha.c +++ b/arch/alpha/kernel/irq_alpha.c @@ -228,7 +228,7 @@ struct irqaction timer_irqaction = { }; static struct irq_chip rtc_irq_type = { - .typename = "RTC", + .name = "RTC", .startup = rtc_startup, .shutdown = rtc_enable_disable, .enable = rtc_enable_disable, diff --git a/arch/alpha/kernel/irq_i8259.c b/arch/alpha/kernel/irq_i8259.c index 50bfec9..83a9ac2 100644 --- a/arch/alpha/kernel/irq_i8259.c +++ b/arch/alpha/kernel/irq_i8259.c @@ -84,7 +84,7 @@ i8259a_end_irq(unsigned int irq) } struct irq_chip i8259a_irq_type = { - .typename = "XT-PIC", + .name = "XT-PIC", .startup = i8259a_startup_irq, .shutdown = i8259a_disable_irq, .enable = i8259a_enable_irq, diff --git a/arch/alpha/kernel/irq_pyxis.c b/arch/alpha/kernel/irq_pyxis.c index 69199a7..989ce46 100644 --- a/arch/alpha/kernel/irq_pyxis.c +++ b/arch/alpha/kernel/irq_pyxis.c @@ -71,7 +71,7 @@ pyxis_mask_and_ack_irq(unsigned int irq) } static struct irq_chip pyxis_irq_type = { - .typename = "PYXIS", + .name = "PYXIS", .startup = pyxis_startup_irq, .shutdown = pyxis_disable_irq, .enable = pyxis_enable_irq, diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c index 8522936..d63e93e 100644 --- a/arch/alpha/kernel/irq_srm.c +++ b/arch/alpha/kernel/irq_srm.c @@ -49,7 +49,7 @@ srm_end_irq(unsigned int irq) /* Handle interrupts from the SRM, assuming no additional weirdness. */ static struct irq_chip srm_irq_type = { - .typename = "SRM", + .name = "SRM", .startup = srm_startup_irq, .shutdown = srm_disable_irq, .enable = srm_enable_irq, diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c index 382035e..20a30b8 100644 --- a/arch/alpha/kernel/sys_alcor.c +++ b/arch/alpha/kernel/sys_alcor.c @@ -90,7 +90,7 @@ alcor_end_irq(unsigned int irq) } static struct irq_chip alcor_irq_type = { - .typename = "ALCOR", + .name = "ALCOR", .startup = alcor_startup_irq, .shutdown = alcor_disable_irq, .enable = alcor_enable_irq, diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c index ed34943..affd0f3 100644 --- a/arch/alpha/kernel/sys_cabriolet.c +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -72,7 +72,7 @@ cabriolet_end_irq(unsigned int irq) } static struct irq_chip cabriolet_irq_type = { - .typename = "CABRIOLET", + .name = "CABRIOLET", .startup = cabriolet_startup_irq, .shutdown = cabriolet_disable_irq, .enable = cabriolet_enable_irq, diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 46e70ec..d64e1e4 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -199,7 +199,7 @@ clipper_set_affinity(unsigned int irq, const struct cpumask *affinity) } static struct irq_chip dp264_irq_type = { - .typename = "DP264", + .name = "DP264", .startup = dp264_startup_irq, .shutdown = dp264_disable_irq, .enable = dp264_enable_irq, @@ -210,7 +210,7 @@ static struct irq_chip dp264_irq_type = { }; static struct irq_chip clipper_irq_type = { - .typename = "CLIPPER", + .name = "CLIPPER", .startup = clipper_startup_irq, .shutdown = clipper_disable_irq, .enable = clipper_enable_irq, diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c index 660c23e..df2090c 100644 --- a/arch/alpha/kernel/sys_eb64p.c +++ b/arch/alpha/kernel/sys_eb64p.c @@ -70,7 +70,7 @@ eb64p_end_irq(unsigned int irq) } static struct irq_chip eb64p_irq_type = { - .typename = "EB64P", + .name = "EB64P", .startup = eb64p_startup_irq, .shutdown = eb64p_disable_irq, .enable = eb64p_enable_irq, diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c index b99ea48..3ca1dbc 100644 --- a/arch/alpha/kernel/sys_eiger.c +++ b/arch/alpha/kernel/sys_eiger.c @@ -81,7 +81,7 @@ eiger_end_irq(unsigned int irq) } static struct irq_chip eiger_irq_type = { - .typename = "EIGER", + .name = "EIGER", .startup = eiger_startup_irq, .shutdown = eiger_disable_irq, .enable = eiger_enable_irq, diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index ef0b83a..7a7ae36 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -119,7 +119,7 @@ jensen_local_end(unsigned int irq) } static struct irq_chip jensen_local_irq_type = { - .typename = "LOCAL", + .name = "LOCAL", .startup = jensen_local_startup, .shutdown = jensen_local_shutdown, .enable = jensen_local_enable, diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index bbfc4f2..0bb3b5c 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -170,7 +170,7 @@ marvel_irq_noop_return(unsigned int irq) } static struct irq_chip marvel_legacy_irq_type = { - .typename = "LEGACY", + .name = "LEGACY", .startup = marvel_irq_noop_return, .shutdown = marvel_irq_noop, .enable = marvel_irq_noop, @@ -180,7 +180,7 @@ static struct irq_chip marvel_legacy_irq_type = { }; static struct irq_chip io7_lsi_irq_type = { - .typename = "LSI", + .name = "LSI", .startup = io7_startup_irq, .shutdown = io7_disable_irq, .enable = io7_enable_irq, @@ -190,7 +190,7 @@ static struct irq_chip io7_lsi_irq_type = { }; static struct irq_chip io7_msi_irq_type = { - .typename = "MSI", + .name = "MSI", .startup = io7_startup_irq, .shutdown = io7_disable_irq, .enable = io7_enable_irq, diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c index 4e36664..ee88651 100644 --- a/arch/alpha/kernel/sys_mikasa.c +++ b/arch/alpha/kernel/sys_mikasa.c @@ -69,7 +69,7 @@ mikasa_end_irq(unsigned int irq) } static struct irq_chip mikasa_irq_type = { - .typename = "MIKASA", + .name = "MIKASA", .startup = mikasa_startup_irq, .shutdown = mikasa_disable_irq, .enable = mikasa_enable_irq, diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c index 35753a1..86503fe 100644 --- a/arch/alpha/kernel/sys_noritake.c +++ b/arch/alpha/kernel/sys_noritake.c @@ -74,7 +74,7 @@ noritake_end_irq(unsigned int irq) } static struct irq_chip noritake_irq_type = { - .typename = "NORITAKE", + .name = "NORITAKE", .startup = noritake_startup_irq, .shutdown = noritake_disable_irq, .enable = noritake_enable_irq, diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c index f3aec7e..26c322b 100644 --- a/arch/alpha/kernel/sys_rawhide.c +++ b/arch/alpha/kernel/sys_rawhide.c @@ -136,7 +136,7 @@ rawhide_end_irq(unsigned int irq) } static struct irq_chip rawhide_irq_type = { - .typename = "RAWHIDE", + .name = "RAWHIDE", .startup = rawhide_startup_irq, .shutdown = rawhide_disable_irq, .enable = rawhide_enable_irq, diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c index fc92463..be16112 100644 --- a/arch/alpha/kernel/sys_rx164.c +++ b/arch/alpha/kernel/sys_rx164.c @@ -73,7 +73,7 @@ rx164_end_irq(unsigned int irq) } static struct irq_chip rx164_irq_type = { - .typename = "RX164", + .name = "RX164", .startup = rx164_startup_irq, .shutdown = rx164_disable_irq, .enable = rx164_enable_irq, diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index 426eb69..b2abe27 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -502,7 +502,7 @@ sable_lynx_mask_and_ack_irq(unsigned int irq) } static struct irq_chip sable_lynx_irq_type = { - .typename = "SABLE/LYNX", + .name = "SABLE/LYNX", .startup = sable_lynx_startup_irq, .shutdown = sable_lynx_disable_irq, .enable = sable_lynx_enable_irq, diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c index 830318c..2304648 100644 --- a/arch/alpha/kernel/sys_takara.c +++ b/arch/alpha/kernel/sys_takara.c @@ -75,7 +75,7 @@ takara_end_irq(unsigned int irq) } static struct irq_chip takara_irq_type = { - .typename = "TAKARA", + .name = "TAKARA", .startup = takara_startup_irq, .shutdown = takara_disable_irq, .enable = takara_enable_irq, diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index 88978fc..2880533 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -195,7 +195,7 @@ init_titan_irqs(struct irq_chip * ops, int imin, int imax) } static struct irq_chip titan_irq_type = { - .typename = "TITAN", + .name = "TITAN", .startup = titan_startup_irq, .shutdown = titan_disable_irq, .enable = titan_enable_irq, diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c index e91b4c3..62fd972 100644 --- a/arch/alpha/kernel/sys_wildfire.c +++ b/arch/alpha/kernel/sys_wildfire.c @@ -158,7 +158,7 @@ wildfire_end_irq(unsigned int irq) } static struct irq_chip wildfire_irq_type = { - .typename = "WILDFIRE", + .name = "WILDFIRE", .startup = wildfire_startup_irq, .shutdown = wildfire_disable_irq, .enable = wildfire_enable_irq, -- cgit v0.10.2 From d0e260782c3702a009645c3caa02e381dab8798b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 1 Dec 2009 17:30:59 +1100 Subject: md: revert incorrect fix for read error handling in raid1. commit 4706b349f was a forward port of a fix that was needed for SLES10. But in fact it is not needed in mainline because the earlier commit dd00a99e7a fixes the same problem in a better way. Further, this commit introduces a bug in the way it interacts with the automatic read-error-correction. If, after a read error is successfully corrected, the same disk is chosen to re-read - the re-read won't be attempted but an error will be returned instead. After reverting that commit, there is the possibility that a read error on a read-only array (where read errors cannot be corrected as that requires a write) will repeatedly read the same device and continue to get an error. So in the "Array is readonly" case, fail the drive immediately on a read error. Signed-off-by: NeilBrown Cc: stable@kernel.org diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a053423..e07ce2e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1650,11 +1650,12 @@ static void raid1d(mddev_t *mddev) r1_bio->sector, r1_bio->sectors); unfreeze_array(conf); - } + } else + md_error(mddev, + conf->mirrors[r1_bio->read_disk].rdev); bio = r1_bio->bios[r1_bio->read_disk]; - if ((disk=read_balance(conf, r1_bio)) == -1 || - disk == r1_bio->read_disk) { + if ((disk=read_balance(conf, r1_bio)) == -1) { printk(KERN_ALERT "raid1: %s: unrecoverable I/O" " read error for block %llu\n", bdevname(bio->bi_bdev,b), -- cgit v0.10.2 From bab81b624e970f1138535a465ad2b26b6bb0dd6c Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 1 Dec 2009 14:04:49 +0800 Subject: perf annotate: Fix perf data parsing perf-annotate doesn't parse perf.data correctly in that it doesn't read perf header. Fix this by using mmap_dispatch_perf_file(). Before: TOTAL events: 17565 MMAP events: 3221 LOST events: 10 COMM events: 235 EXIT events: 2 THROTTLE events: 1 UNTHROTTLE events: 2 FORK events: 10 READ events: 1 SAMPLE events: 14083 After: TOTAL events: 17290 MMAP events: 3203 LOST events: 0 COMM events: 234 EXIT events: 1 THROTTLE events: 0 UNTHROTTLE events: 0 FORK events: 0 READ events: 0 SAMPLE events: 13852 Signed-off-by: Li Zefan Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Arjan van de Ven LKML-Reference: <4B14B201.9030708@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 7f85c6e..0bf2e8f 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -25,19 +25,16 @@ #include "util/thread.h" #include "util/sort.h" #include "util/hist.h" +#include "util/data_map.h" static char const *input_name = "perf.data"; static int force; -static int input; static int full_paths; static int print_line; -static unsigned long page_size; -static unsigned long mmap_window = 32; - struct sym_hist { u64 sum; u64 ip[0]; @@ -156,35 +153,6 @@ static int process_sample_event(event_t *event) return 0; } -static int event__process(event_t *self) -{ - switch (self->header.type) { - case PERF_RECORD_SAMPLE: - return process_sample_event(self); - - case PERF_RECORD_MMAP: - return event__process_mmap(self); - - case PERF_RECORD_COMM: - return event__process_comm(self); - - case PERF_RECORD_FORK: - return event__process_task(self); - /* - * We dont process them right now but they are fine: - */ - - case PERF_RECORD_THROTTLE: - case PERF_RECORD_UNTHROTTLE: - return 0; - - default: - return -1; - } - - return 0; -} - static int parse_line(FILE *file, struct hist_entry *he, u64 len) { struct symbol *sym = he->sym; @@ -485,99 +453,26 @@ static void find_annotations(void) } } +static struct perf_file_handler file_handler = { + .process_sample_event = process_sample_event, + .process_mmap_event = event__process_mmap, + .process_comm_event = event__process_comm, + .process_fork_event = event__process_task, +}; + static int __cmd_annotate(void) { - int ret, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head = 0; - struct stat input_stat; - event_t *event; - uint32_t size; - char *buf; - - register_idle_thread(); - - input = open(input_name, O_RDONLY); - if (input < 0) { - perror("failed to open file"); - exit(-1); - } - - ret = fstat(input, &input_stat); - if (ret < 0) { - perror("failed to stat file"); - exit(-1); - } - - if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { - fprintf(stderr, "file: %s not owned by current user or root\n", input_name); - exit(-1); - } - - if (!input_stat.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } - -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); - } - -more: - event = (event_t *)(buf + head); - - size = event->header.size; - if (!size) - size = 8; - - if (head + event->header.size >= page_size * mmap_window) { - unsigned long shift = page_size * (head / page_size); - int munmap_ret; - - munmap_ret = munmap(buf, page_size * mmap_window); - assert(munmap_ret == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - dump_printf("%p [%p]: event: %d\n", - (void *)(offset + head), - (void *)(long)event->header.size, - event->header.type); - - if (!size || event__process(event) < 0) { - - dump_printf("%p [%p]: skipping unknown header type: %d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type); - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ - - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; - } - - head += size; - - if (offset + head < (unsigned long)input_stat.st_size) - goto more; + struct perf_header *header; + struct thread *idle; + int ret; - rc = EXIT_SUCCESS; - close(input); + idle = register_idle_thread(); + register_perf_file_handler(&file_handler); + ret = mmap_dispatch_perf_file(&header, input_name, 0, 0, + &event__cwdlen, &event__cwd); + if (ret) + return ret; if (dump_trace) { event__print_totals(); @@ -595,7 +490,7 @@ more: find_annotations(); - return rc; + return ret; } static const char * const annotate_usage[] = { @@ -644,8 +539,6 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) if (symbol__init(&symbol_conf) < 0) return -1; - page_size = getpagesize(); - argc = parse_options(argc, argv, options, annotate_usage, 0); setup_sorting(); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 233d7ad..414b89d 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -186,8 +186,7 @@ int event__process_comm(event_t *self) { struct thread *thread = threads__findnew(self->comm.pid); - dump_printf("PERF_RECORD_COMM: %s:%d\n", - self->comm.comm, self->comm.pid); + dump_printf(": %s:%d\n", self->comm.comm, self->comm.pid); if (thread == NULL || thread__set_comm(thread, self->comm.comm)) { dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); -- cgit v0.10.2 From 5cbd08056142dcb2aea0dca7261afcb810a63c55 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 1 Dec 2009 14:05:16 +0800 Subject: perf timechart: Remove open-coded event parsing code Convert builtin-timechart.c to mmap_dispatch_perf_file() + perf_file_handler. Signed-off-by: Li Zefan Acked-by: Arjan van de Ven Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra LKML-Reference: <4B14B21C.2040406@cn.fujitsu.com> [ v2: cleaned up the printout, fixed a whitespace detail ] Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index dd4d82a..cb58b66 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -29,14 +29,14 @@ #include "util/header.h" #include "util/parse-options.h" #include "util/parse-events.h" +#include "util/event.h" +#include "util/data_map.h" #include "util/svghelper.h" static char const *input_name = "perf.data"; static char const *output_name = "output.svg"; -static unsigned long page_size; -static unsigned long mmap_window = 32; static u64 sample_type; static unsigned int numcpus; @@ -49,8 +49,6 @@ static u64 first_time, last_time; static int power_only; -static struct perf_header *header; - struct per_pid; struct per_pidcomm; @@ -156,9 +154,9 @@ struct sample_wrapper *all_samples; struct process_filter; struct process_filter { - char *name; - int pid; - struct process_filter *next; + char *name; + int pid; + struct process_filter *next; }; static struct process_filter *process_filter; @@ -1045,36 +1043,6 @@ static void write_svg_file(const char *filename) svg_close(); } -static int -process_event(event_t *event) -{ - - switch (event->header.type) { - - case PERF_RECORD_COMM: - return process_comm_event(event); - case PERF_RECORD_FORK: - return process_fork_event(event); - case PERF_RECORD_EXIT: - return process_exit_event(event); - case PERF_RECORD_SAMPLE: - return queue_sample_event(event); - - /* - * We dont process them right now but they are fine: - */ - case PERF_RECORD_MMAP: - case PERF_RECORD_THROTTLE: - case PERF_RECORD_UNTHROTTLE: - return 0; - - default: - return -1; - } - - return 0; -} - static void process_samples(void) { struct sample_wrapper *cursor; @@ -1090,114 +1058,38 @@ static void process_samples(void) } } - -static int __cmd_timechart(void) +static int sample_type_check(u64 type) { - int err, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head, shift; - struct stat statbuf; - event_t *event; - uint32_t size; - char *buf; - int input; - - input = open(input_name, O_RDONLY); - if (input < 0) { - fprintf(stderr, " failed to open file: %s", input_name); - if (!strcmp(input_name, "perf.data")) - fprintf(stderr, " (try 'perf record' first)"); - fprintf(stderr, "\n"); - exit(-1); - } - - err = fstat(input, &statbuf); - if (err < 0) { - perror("failed to stat file"); - exit(-1); - } - - if (!statbuf.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } + sample_type = type; - header = perf_header__new(); - if (header == NULL) - return -ENOMEM; - - err = perf_header__read(header, input); - if (err < 0) { - perf_header__delete(header); - return err; - } - - head = header->data_offset; - - sample_type = perf_header__sample_type(header); - - shift = page_size * (head / page_size); - offset += shift; - head -= shift; - -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); - } - -more: - event = (event_t *)(buf + head); - - size = event->header.size; - if (!size) - size = 8; - - if (head + event->header.size >= page_size * mmap_window) { - int ret2; - - shift = page_size * (head / page_size); - - ret2 = munmap(buf, page_size * mmap_window); - assert(ret2 == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - if (!size || process_event(event) < 0) { - pr_warning("%p [%p]: skipping unknown header type: %d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type); - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ - - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; + if (!(sample_type & PERF_SAMPLE_RAW)) { + fprintf(stderr, "No trace samples found in the file.\n" + "Have you used 'perf timechart record' to record it?\n"); + return -1; } - head += size; + return 0; +} - if (offset + head >= header->data_offset + header->data_size) - goto done; +static struct perf_file_handler file_handler = { + .process_comm_event = process_comm_event, + .process_fork_event = process_fork_event, + .process_exit_event = process_exit_event, + .process_sample_event = queue_sample_event, + .sample_type_check = sample_type_check, +}; - if (offset + head < (unsigned long)statbuf.st_size) - goto more; +static int __cmd_timechart(void) +{ + struct perf_header *header; + int ret; -done: - rc = EXIT_SUCCESS; - close(input); + register_perf_file_handler(&file_handler); + ret = mmap_dispatch_perf_file(&header, input_name, 0, 0, + &event__cwdlen, &event__cwd); + if (ret) + return EXIT_FAILURE; process_samples(); @@ -1210,7 +1102,7 @@ done: pr_info("Written %2.1f seconds of trace to %s.\n", (last_time - first_time) / 1000000000.0, output_name); - return rc; + return EXIT_SUCCESS; } static const char * const timechart_usage[] = { @@ -1277,8 +1169,6 @@ int cmd_timechart(int argc, const char **argv, const char *prefix __used) { symbol__init(0); - page_size = getpagesize(); - argc = parse_options(argc, argv, options, timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION); -- cgit v0.10.2 From ba8665d7dd95eb6093ee06f8f624b6acb1e73206 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 30 Nov 2009 19:19:20 -0500 Subject: trace_kprobes: Fix a memory leak bug and check kstrdup() return value Fix a memory leak case in create_trace_probe(). When an argument is too long (> MAX_ARGSTR_LEN), it just jumps to error path. In that case tp->args[i].name is not released. This also fixes a bug to check kstrdup()'s return value. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091201001919.10235.56455.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 72d0c65..aff5f80 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -483,7 +483,8 @@ static int parse_probe_vars(char *arg, struct fetch_func *ff, int is_return) return ret; } -static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) +/* Recursive argument parser */ +static int __parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) { int ret = 0; unsigned long param; @@ -543,7 +544,7 @@ static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) if (!id) return -ENOMEM; id->offset = offset; - ret = parse_probe_arg(arg, &id->orig, is_return); + ret = __parse_probe_arg(arg, &id->orig, is_return); if (ret) kfree(id); else { @@ -560,6 +561,16 @@ static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) return ret; } +/* String length checking wrapper */ +static int parse_probe_arg(char *arg, struct fetch_func *ff, int is_return) +{ + if (strlen(arg) > MAX_ARGSTR_LEN) { + pr_info("Argument is too long.: %s\n", arg); + return -ENOSPC; + } + return __parse_probe_arg(arg, ff, is_return); +} + /* Return 1 if name is reserved or already used by another argument */ static int conflict_field_name(const char *name, struct probe_arg *args, int narg) @@ -698,13 +709,14 @@ static int create_trace_probe(int argc, char **argv) } tp->args[i].name = kstrdup(argv[i], GFP_KERNEL); - - /* Parse fetch argument */ - if (strlen(arg) > MAX_ARGSTR_LEN) { - pr_info("Argument%d(%s) is too long.\n", i, arg); - ret = -ENOSPC; + if (!tp->args[i].name) { + pr_info("Failed to allocate argument%d name '%s'.\n", + i, argv[i]); + ret = -ENOMEM; goto error; } + + /* Parse fetch argument */ ret = parse_probe_arg(arg, &tp->args[i].fetch, is_return); if (ret) { pr_info("Parse error at argument%d. (%d)\n", i, ret); -- cgit v0.10.2 From f41b1e43c41e99c39a2222578a7806032c045c79 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 30 Nov 2009 19:19:27 -0500 Subject: perf probe: Change a debugging message from pr_info to pr_debug Change annoying debug-info using notice from pr_info() to pr_debug(), since the message always printed when user adds a probe point which requires debug-info. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091201001927.10235.63645.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index a2f6daf..4e418af 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -351,7 +351,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) #ifdef NO_LIBDWARF semantic_error("Debuginfo-analysis is not supported"); #else /* !NO_LIBDWARF */ - pr_info("Some probes require debuginfo.\n"); + pr_debug("Some probes require debuginfo.\n"); if (session.vmlinux) fd = open(session.vmlinux, O_RDONLY); -- cgit v0.10.2 From 57d250df7deb3e1742fbf3cc3230119731109552 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 30 Nov 2009 19:19:34 -0500 Subject: perf probe: Add probe-finder.h without libdwarf Add probe-finder.h as LIB_H without libdwarf, because that header is included even if no libdwarf. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091201001934.10235.44656.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index f1537a9..76e4b04 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -369,6 +369,7 @@ LIB_H += util/sort.h LIB_H += util/hist.h LIB_H += util/thread.h LIB_H += util/data_map.h +LIB_H += util/probe-finder.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -485,7 +486,6 @@ ifneq ($(shell sh -c "(echo '\#include '; echo '\#include Date: Mon, 30 Nov 2009 19:19:43 -0500 Subject: perf probe: Fix argv array size in probe parser Since the syntax has been changed, probe definition needs parameters less than MAX_PROBE_ARGS + 1 (probe-point + arguments). Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091201001943.10235.80367.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 4e418af..510fdd4 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -151,7 +151,7 @@ static void parse_probe_point(char *arg, struct probe_point *pp) /* Parse an event definition. Note that any error must die. */ static void parse_probe_event(const char *str) { - char *argv[MAX_PROBE_ARGS + 2]; /* Event + probe + args */ + char *argv[MAX_PROBE_ARGS + 1]; /* probe + args */ int argc, i; struct probe_point *pp = &session.probes[session.nr_probe]; @@ -169,6 +169,9 @@ static void parse_probe_event(const char *str) /* Add an argument */ if (*str != '\0') { const char *s = str; + /* Check the limit number of arguments */ + if (argc == MAX_PROBE_ARGS + 1) + semantic_error("Too many arguments"); /* Skip the argument */ while (!isspace(*str) && *str != '\0') @@ -178,9 +181,9 @@ static void parse_probe_event(const char *str) argv[argc] = strndup(s, str - s); if (argv[argc] == NULL) die("strndup"); - if (++argc == MAX_PROBE_ARGS) - semantic_error("Too many arguments"); - pr_debug("argv[%d]=%s\n", argc, argv[argc - 1]); + pr_debug("argv[%d]=%s\n", argc, argv[argc]); + argc++; + } } while (*str != '\0'); if (!argc) -- cgit v0.10.2 From 934b1f5fd0c9a2ddde5a4487695c126243d9a42b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 30 Nov 2009 19:19:51 -0500 Subject: perf probe: Fix probe array index for multiple probe points Fix the index of formatted probe array for multiple probe points, which should be probes[i] instead of probes[0]. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091201001950.10235.54781.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 510fdd4..5f47e62 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -428,7 +428,7 @@ end_dwarf: pp->retprobe ? 'r' : 'p', PERFPROBE_GROUP, pp->function, pp->offset, i, - pp->probes[0]); + pp->probes[i]); write_new_event(fd, buf); } } -- cgit v0.10.2 From 50656eec82684d03add0f4f4b4875a20bd8f9755 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 30 Nov 2009 19:19:58 -0500 Subject: perf probe: Move probe event utility functions to probe-event.c Split probe event (kprobe-events and perf probe events) utility functions from builtin-probe.c to probe-event.c. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091201001958.10235.90243.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 76e4b04..f8537cf 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -370,6 +370,7 @@ LIB_H += util/hist.h LIB_H += util/thread.h LIB_H += util/data_map.h LIB_H += util/probe-finder.h +LIB_H += util/probe-event.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -412,6 +413,7 @@ LIB_OBJS += util/svghelper.o LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o LIB_OBJS += util/data_map.o +LIB_OBJS += util/probe-event.o BUILTIN_OBJS += builtin-annotate.o diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 5f47e62..bf20df2 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -40,6 +40,7 @@ #include "util/parse-options.h" #include "util/parse-events.h" /* For debugfs_path */ #include "util/probe-finder.h" +#include "util/probe-event.h" /* Default vmlinux search paths */ #define NR_SEARCH_PATH 3 @@ -51,8 +52,6 @@ const char *default_search_path[NR_SEARCH_PATH] = { #define MAX_PATH_LEN 256 #define MAX_PROBES 128 -#define MAX_PROBE_ARGS 128 -#define PERFPROBE_GROUP "probe" /* Session management structure */ static struct { @@ -63,155 +62,17 @@ static struct { struct probe_point probes[MAX_PROBES]; } session; -#define semantic_error(msg ...) die("Semantic error :" msg) - -/* Parse probe point. Return 1 if return probe */ -static void parse_probe_point(char *arg, struct probe_point *pp) -{ - char *ptr, *tmp; - char c, nc = 0; - /* - * - * perf probe SRC:LN - * perf probe FUNC[+OFFS|%return][@SRC] - */ - - ptr = strpbrk(arg, ":+@%"); - if (ptr) { - nc = *ptr; - *ptr++ = '\0'; - } - - /* Check arg is function or file and copy it */ - if (strchr(arg, '.')) /* File */ - pp->file = strdup(arg); - else /* Function */ - pp->function = strdup(arg); - DIE_IF(pp->file == NULL && pp->function == NULL); - - /* Parse other options */ - while (ptr) { - arg = ptr; - c = nc; - ptr = strpbrk(arg, ":+@%"); - if (ptr) { - nc = *ptr; - *ptr++ = '\0'; - } - switch (c) { - case ':': /* Line number */ - pp->line = strtoul(arg, &tmp, 0); - if (*tmp != '\0') - semantic_error("There is non-digit charactor" - " in line number."); - break; - case '+': /* Byte offset from a symbol */ - pp->offset = strtoul(arg, &tmp, 0); - if (*tmp != '\0') - semantic_error("There is non-digit charactor" - " in offset."); - break; - case '@': /* File name */ - if (pp->file) - semantic_error("SRC@SRC is not allowed."); - pp->file = strdup(arg); - DIE_IF(pp->file == NULL); - if (ptr) - semantic_error("@SRC must be the last " - "option."); - break; - case '%': /* Probe places */ - if (strcmp(arg, "return") == 0) { - pp->retprobe = 1; - } else /* Others not supported yet */ - semantic_error("%%%s is not supported.", arg); - break; - default: - DIE_IF("Program has a bug."); - break; - } - } - - /* Exclusion check */ - if (pp->line && pp->offset) - semantic_error("Offset can't be used with line number."); - if (!pp->line && pp->file && !pp->function) - semantic_error("File always requires line number."); - if (pp->offset && !pp->function) - semantic_error("Offset requires an entry function."); - if (pp->retprobe && !pp->function) - semantic_error("Return probe requires an entry function."); - if ((pp->offset || pp->line) && pp->retprobe) - semantic_error("Offset/Line can't be used with return probe."); - - pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n", - pp->function, pp->file, pp->line, pp->offset, pp->retprobe); -} - /* Parse an event definition. Note that any error must die. */ static void parse_probe_event(const char *str) { - char *argv[MAX_PROBE_ARGS + 1]; /* probe + args */ - int argc, i; struct probe_point *pp = &session.probes[session.nr_probe]; pr_debug("probe-definition(%d): %s\n", session.nr_probe, str); if (++session.nr_probe == MAX_PROBES) - semantic_error("Too many probes"); - - /* Separate arguments, similar to argv_split */ - argc = 0; - do { - /* Skip separators */ - while (isspace(*str)) - str++; - - /* Add an argument */ - if (*str != '\0') { - const char *s = str; - /* Check the limit number of arguments */ - if (argc == MAX_PROBE_ARGS + 1) - semantic_error("Too many arguments"); - - /* Skip the argument */ - while (!isspace(*str) && *str != '\0') - str++; - - /* Duplicate the argument */ - argv[argc] = strndup(s, str - s); - if (argv[argc] == NULL) - die("strndup"); - pr_debug("argv[%d]=%s\n", argc, argv[argc]); - argc++; - - } - } while (*str != '\0'); - if (!argc) - semantic_error("An empty argument."); - - /* Parse probe point */ - parse_probe_point(argv[0], pp); - free(argv[0]); - if (pp->file || pp->line) - session.need_dwarf = 1; - - /* Copy arguments */ - pp->nr_args = argc - 1; - if (pp->nr_args > 0) { - pp->args = (char **)malloc(sizeof(char *) * pp->nr_args); - if (!pp->args) - die("malloc"); - memcpy(pp->args, &argv[1], sizeof(char *) * pp->nr_args); - } + die("Too many probes (> %d) are specified.", MAX_PROBES); - /* Ensure return probe has no C argument */ - for (i = 0; i < pp->nr_args; i++) - if (is_c_varname(pp->args[i])) { - if (pp->retprobe) - semantic_error("You can't specify local" - " variable for kretprobe"); - session.need_dwarf = 1; - } + /* Parse perf-probe event into probe_point */ + session.need_dwarf = parse_perf_probe_event(str, pp); pr_debug("%d arguments\n", pp->nr_args); } @@ -288,59 +149,15 @@ static const struct option options[] = { "\t\tALN:\tAbsolute line number in file.\n" "\t\tARG:\tProbe argument (local variable name or\n" #endif - "\t\t\tkprobe-tracer argument format is supported.)\n", + "\t\t\tkprobe-tracer argument format.)\n", opt_add_probe_event), OPT_END() }; -static int write_new_event(int fd, const char *buf) -{ - int ret; - - ret = write(fd, buf, strlen(buf)); - if (ret <= 0) - die("Failed to create event."); - else - printf("Added new event: %s\n", buf); - - return ret; -} - -#define MAX_CMDLEN 256 - -static int synthesize_probe_event(struct probe_point *pp) -{ - char *buf; - int i, len, ret; - pp->probes[0] = buf = zalloc(MAX_CMDLEN); - if (!buf) - die("Failed to allocate memory by zalloc."); - ret = snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); - if (ret <= 0 || ret >= MAX_CMDLEN) - goto error; - len = ret; - - for (i = 0; i < pp->nr_args; i++) { - ret = snprintf(&buf[len], MAX_CMDLEN - len, " %s", - pp->args[i]); - if (ret <= 0 || ret >= MAX_CMDLEN - len) - goto error; - len += ret; - } - pp->found = 1; - return pp->found; -error: - free(pp->probes[0]); - if (ret > 0) - ret = -E2BIG; - return ret; -} - int cmd_probe(int argc, const char **argv, const char *prefix __used) { int i, j, fd, ret; struct probe_point *pp; - char buf[MAX_CMDLEN]; argc = parse_options(argc, argv, options, probe_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -352,7 +169,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) if (session.need_dwarf) #ifdef NO_LIBDWARF - semantic_error("Debuginfo-analysis is not supported"); + die("Debuginfo-analysis is not supported"); #else /* !NO_LIBDWARF */ pr_debug("Some probes require debuginfo.\n"); @@ -398,41 +215,15 @@ end_dwarf: if (pp->found) /* This probe is already found. */ continue; - ret = synthesize_probe_event(pp); + ret = synthesize_trace_kprobe_event(pp); if (ret == -E2BIG) - semantic_error("probe point is too long."); + die("probe point definition becomes too long."); else if (ret < 0) die("Failed to synthesize a probe point."); } /* Settng up probe points */ - snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path); - fd = open(buf, O_WRONLY, O_APPEND); - if (fd < 0) { - if (errno == ENOENT) - die("kprobe_events file does not exist - please rebuild with CONFIG_KPROBE_TRACER."); - else - die("Could not open kprobe_events file: %s", - strerror(errno)); - } - for (j = 0; j < session.nr_probe; j++) { - pp = &session.probes[j]; - if (pp->found == 1) { - snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x %s\n", - pp->retprobe ? 'r' : 'p', PERFPROBE_GROUP, - pp->function, pp->offset, pp->probes[0]); - write_new_event(fd, buf); - } else - for (i = 0; i < pp->found; i++) { - snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x_%d %s\n", - pp->retprobe ? 'r' : 'p', - PERFPROBE_GROUP, - pp->function, pp->offset, i, - pp->probes[i]); - write_new_event(fd, buf); - } - } - close(fd); + add_trace_kprobe_events(session.probes, session.nr_probe); return 0; } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c new file mode 100644 index 0000000..7335a3b --- /dev/null +++ b/tools/perf/util/probe-event.c @@ -0,0 +1,273 @@ +/* + * probe-event.c : perf-probe definition to kprobe_events format converter + * + * Written by Masami Hiramatsu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#undef _GNU_SOURCE +#include "event.h" +#include "debug.h" +#include "parse-events.h" /* For debugfs_path */ +#include "probe-event.h" + +#define MAX_CMDLEN 256 +#define MAX_PROBE_ARGS 128 +#define PERFPROBE_GROUP "probe" + +#define semantic_error(msg ...) die("Semantic error :" msg) + +/* Parse probepoint definition. */ +static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) +{ + char *ptr, *tmp; + char c, nc = 0; + /* + * + * perf probe SRC:LN + * perf probe FUNC[+OFFS|%return][@SRC] + */ + + ptr = strpbrk(arg, ":+@%"); + if (ptr) { + nc = *ptr; + *ptr++ = '\0'; + } + + /* Check arg is function or file and copy it */ + if (strchr(arg, '.')) /* File */ + pp->file = strdup(arg); + else /* Function */ + pp->function = strdup(arg); + DIE_IF(pp->file == NULL && pp->function == NULL); + + /* Parse other options */ + while (ptr) { + arg = ptr; + c = nc; + ptr = strpbrk(arg, ":+@%"); + if (ptr) { + nc = *ptr; + *ptr++ = '\0'; + } + switch (c) { + case ':': /* Line number */ + pp->line = strtoul(arg, &tmp, 0); + if (*tmp != '\0') + semantic_error("There is non-digit charactor" + " in line number."); + break; + case '+': /* Byte offset from a symbol */ + pp->offset = strtoul(arg, &tmp, 0); + if (*tmp != '\0') + semantic_error("There is non-digit charactor" + " in offset."); + break; + case '@': /* File name */ + if (pp->file) + semantic_error("SRC@SRC is not allowed."); + pp->file = strdup(arg); + DIE_IF(pp->file == NULL); + if (ptr) + semantic_error("@SRC must be the last " + "option."); + break; + case '%': /* Probe places */ + if (strcmp(arg, "return") == 0) { + pp->retprobe = 1; + } else /* Others not supported yet */ + semantic_error("%%%s is not supported.", arg); + break; + default: + DIE_IF("Program has a bug."); + break; + } + } + + /* Exclusion check */ + if (pp->line && pp->offset) + semantic_error("Offset can't be used with line number."); + + if (!pp->line && pp->file && !pp->function) + semantic_error("File always requires line number."); + + if (pp->offset && !pp->function) + semantic_error("Offset requires an entry function."); + + if (pp->retprobe && !pp->function) + semantic_error("Return probe requires an entry function."); + + if ((pp->offset || pp->line) && pp->retprobe) + semantic_error("Offset/Line can't be used with return probe."); + + pr_debug("symbol:%s file:%s line:%d offset:%d, return:%d\n", + pp->function, pp->file, pp->line, pp->offset, pp->retprobe); +} + +/* Parse perf-probe event definition */ +int parse_perf_probe_event(const char *str, struct probe_point *pp) +{ + char *argv[MAX_PROBE_ARGS + 1]; /* probe + args */ + int argc, i, need_dwarf = 0; + + /* Separate arguments, similar to argv_split */ + argc = 0; + do { + /* Skip separators */ + while (isspace(*str)) + str++; + + /* Add an argument */ + if (*str != '\0') { + const char *s = str; + /* Check the limit number of arguments */ + if (argc == MAX_PROBE_ARGS + 1) + semantic_error("Too many arguments"); + + /* Skip the argument */ + while (!isspace(*str) && *str != '\0') + str++; + + /* Duplicate the argument */ + argv[argc] = strndup(s, str - s); + if (argv[argc] == NULL) + die("strndup"); + pr_debug("argv[%d]=%s\n", argc, argv[argc]); + argc++; + } + } while (*str != '\0'); + if (!argc) + semantic_error("An empty argument."); + + /* Parse probe point */ + parse_perf_probe_probepoint(argv[0], pp); + free(argv[0]); + if (pp->file || pp->line) + need_dwarf = 1; + + /* Copy arguments */ + pp->nr_args = argc - 1; + if (pp->nr_args > 0) { + pp->args = (char **)malloc(sizeof(char *) * pp->nr_args); + if (!pp->args) + die("malloc"); + memcpy(pp->args, &argv[1], sizeof(char *) * pp->nr_args); + } + + /* Ensure return probe has no C argument */ + for (i = 0; i < pp->nr_args; i++) + if (is_c_varname(pp->args[i])) { + if (pp->retprobe) + semantic_error("You can't specify local" + " variable for kretprobe"); + need_dwarf = 1; + } + + return need_dwarf; +} + +int synthesize_trace_kprobe_event(struct probe_point *pp) +{ + char *buf; + int i, len, ret; + + pp->probes[0] = buf = zalloc(MAX_CMDLEN); + if (!buf) + die("Failed to allocate memory by zalloc."); + ret = snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); + if (ret <= 0 || ret >= MAX_CMDLEN) + goto error; + len = ret; + + for (i = 0; i < pp->nr_args; i++) { + ret = snprintf(&buf[len], MAX_CMDLEN - len, " %s", + pp->args[i]); + if (ret <= 0 || ret >= MAX_CMDLEN - len) + goto error; + len += ret; + } + pp->found = 1; + + return pp->found; +error: + free(pp->probes[0]); + if (ret > 0) + ret = -E2BIG; + + return ret; +} + +static int write_trace_kprobe_event(int fd, const char *buf) +{ + int ret; + + ret = write(fd, buf, strlen(buf)); + if (ret <= 0) + die("Failed to create event."); + else + printf("Added new event: %s\n", buf); + + return ret; +} + +void add_trace_kprobe_events(struct probe_point *probes, int nr_probes) +{ + int i, j, fd; + struct probe_point *pp; + char buf[MAX_CMDLEN]; + + snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path); + fd = open(buf, O_WRONLY, O_APPEND); + if (fd < 0) { + if (errno == ENOENT) + die("kprobe_events file does not exist -" + " please rebuild with CONFIG_KPROBE_TRACER."); + else + die("Could not open kprobe_events file: %s", + strerror(errno)); + } + + for (j = 0; j < nr_probes; j++) { + pp = probes + j; + if (pp->found == 1) { + snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x %s\n", + pp->retprobe ? 'r' : 'p', PERFPROBE_GROUP, + pp->function, pp->offset, pp->probes[0]); + write_trace_kprobe_event(fd, buf); + } else + for (i = 0; i < pp->found; i++) { + snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x_%d %s\n", + pp->retprobe ? 'r' : 'p', + PERFPROBE_GROUP, + pp->function, pp->offset, i, + pp->probes[i]); + write_trace_kprobe_event(fd, buf); + } + } + close(fd); +} diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h new file mode 100644 index 0000000..0089c45 --- /dev/null +++ b/tools/perf/util/probe-event.h @@ -0,0 +1,10 @@ +#ifndef _PROBE_EVENT_H +#define _PROBE_EVENT_H + +#include "probe-finder.h" + +extern int parse_perf_probe_event(const char *str, struct probe_point *pp); +extern int synthesize_trace_kprobe_event(struct probe_point *pp); +extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes); + +#endif /*_PROBE_EVENT_H */ -- cgit v0.10.2 From e1c01d61a98703fcc80d15b8068ec36d5a215f7e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 30 Nov 2009 19:20:05 -0500 Subject: perf probe: Add argv_split() from lib/argv_split.c Add argv_split() ported from lib/argv_split.c to string.c and use it in util/probe-event.c. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091201002005.10235.55602.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 7335a3b..e3a683a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -32,6 +32,7 @@ #undef _GNU_SOURCE #include "event.h" +#include "string.h" #include "debug.h" #include "parse-events.h" /* For debugfs_path */ #include "probe-event.h" @@ -132,62 +133,36 @@ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) /* Parse perf-probe event definition */ int parse_perf_probe_event(const char *str, struct probe_point *pp) { - char *argv[MAX_PROBE_ARGS + 1]; /* probe + args */ + char **argv; int argc, i, need_dwarf = 0; - /* Separate arguments, similar to argv_split */ - argc = 0; - do { - /* Skip separators */ - while (isspace(*str)) - str++; - - /* Add an argument */ - if (*str != '\0') { - const char *s = str; - /* Check the limit number of arguments */ - if (argc == MAX_PROBE_ARGS + 1) - semantic_error("Too many arguments"); - - /* Skip the argument */ - while (!isspace(*str) && *str != '\0') - str++; - - /* Duplicate the argument */ - argv[argc] = strndup(s, str - s); - if (argv[argc] == NULL) - die("strndup"); - pr_debug("argv[%d]=%s\n", argc, argv[argc]); - argc++; - } - } while (*str != '\0'); - if (!argc) - semantic_error("An empty argument."); + argv = argv_split(str, &argc); + if (!argv) + die("argv_split failed."); + if (argc > MAX_PROBE_ARGS + 1) + semantic_error("Too many arguments"); /* Parse probe point */ parse_perf_probe_probepoint(argv[0], pp); - free(argv[0]); if (pp->file || pp->line) need_dwarf = 1; - /* Copy arguments */ + /* Copy arguments and ensure return probe has no C argument */ pp->nr_args = argc - 1; - if (pp->nr_args > 0) { - pp->args = (char **)malloc(sizeof(char *) * pp->nr_args); - if (!pp->args) - die("malloc"); - memcpy(pp->args, &argv[1], sizeof(char *) * pp->nr_args); - } - - /* Ensure return probe has no C argument */ - for (i = 0; i < pp->nr_args; i++) + pp->args = zalloc(sizeof(char *) * pp->nr_args); + for (i = 0; i < pp->nr_args; i++) { + pp->args[i] = strdup(argv[i + 1]); + if (!pp->args[i]) + die("Failed to copy argument."); if (is_c_varname(pp->args[i])) { if (pp->retprobe) semantic_error("You can't specify local" " variable for kretprobe"); need_dwarf = 1; } + } + argv_free(argv); return need_dwarf; } diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 2270435..0977cf4 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -127,3 +127,104 @@ out_err: out: return length; } + +/* + * Helper function for splitting a string into an argv-like array. + * originaly copied from lib/argv_split.c + */ +static const char *skip_sep(const char *cp) +{ + while (*cp && isspace(*cp)) + cp++; + + return cp; +} + +static const char *skip_arg(const char *cp) +{ + while (*cp && !isspace(*cp)) + cp++; + + return cp; +} + +static int count_argc(const char *str) +{ + int count = 0; + + while (*str) { + str = skip_sep(str); + if (*str) { + count++; + str = skip_arg(str); + } + } + + return count; +} + +/** + * argv_free - free an argv + * @argv - the argument vector to be freed + * + * Frees an argv and the strings it points to. + */ +void argv_free(char **argv) +{ + char **p; + for (p = argv; *p; p++) + free(*p); + + free(argv); +} + +/** + * argv_split - split a string at whitespace, returning an argv + * @str: the string to be split + * @argcp: returned argument count + * + * Returns an array of pointers to strings which are split out from + * @str. This is performed by strictly splitting on white-space; no + * quote processing is performed. Multiple whitespace characters are + * considered to be a single argument separator. The returned array + * is always NULL-terminated. Returns NULL on memory allocation + * failure. + */ +char **argv_split(const char *str, int *argcp) +{ + int argc = count_argc(str); + char **argv = zalloc(sizeof(*argv) * (argc+1)); + char **argvp; + + if (argv == NULL) + goto out; + + if (argcp) + *argcp = argc; + + argvp = argv; + + while (*str) { + str = skip_sep(str); + + if (*str) { + const char *p = str; + char *t; + + str = skip_arg(str); + + t = strndup(p, str-p); + if (t == NULL) + goto fail; + *argvp++ = t; + } + } + *argvp = NULL; + +out: + return argv; + +fail: + argv_free(argv); + return NULL; +} diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index e50b07f..bfecec2 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h @@ -6,6 +6,8 @@ int hex2u64(const char *ptr, u64 *val); char *strxfrchar(char *s, char from, char to); s64 perf_atoll(const char *str); +char **argv_split(const char *str, int *argcp); +void argv_free(char **argv); #define _STR(x) #x #define STR(x) _STR(x) -- cgit v0.10.2 From 4de189fe6e5ad8241f6f8709d2e2ba4c3aeae33a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 30 Nov 2009 19:20:17 -0500 Subject: perf probe: Add --list option for listing current probe events Add --list option for listing currently defined probe events in the kernel. This shows events in below format; [group:event] for example: [probe:schedule_0] schedule+30 cpu Note that source file/line information is not supported yet. So even if you added a probe by line, it will be shown in . Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091201002017.10235.76575.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index bf20df2..b5d15cf 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -62,6 +62,8 @@ static struct { struct probe_point probes[MAX_PROBES]; } session; +static bool listing; + /* Parse an event definition. Note that any error must die. */ static void parse_probe_event(const char *str) { @@ -119,6 +121,7 @@ static int open_default_vmlinux(void) static const char * const probe_usage[] = { "perf probe [] 'PROBEDEF' ['PROBEDEF' ...]", "perf probe [] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", + "perf probe --list", NULL }; @@ -129,6 +132,7 @@ static const struct option options[] = { OPT_STRING('k', "vmlinux", &session.vmlinux, "file", "vmlinux/module pathname"), #endif + OPT_BOOLEAN('l', "list", &listing, "list up current probes"), OPT_CALLBACK('a', "add", NULL, #ifdef NO_LIBDWARF "FUNC[+OFFS|%return] [ARG ...]", @@ -164,9 +168,15 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used) for (i = 0; i < argc; i++) parse_probe_event(argv[i]); - if (session.nr_probe == 0) + if ((session.nr_probe == 0 && !listing) || + (session.nr_probe != 0 && listing)) usage_with_options(probe_usage, options); + if (listing) { + show_perf_probe_events(); + return 0; + } + if (session.need_dwarf) #ifdef NO_LIBDWARF die("Debuginfo-analysis is not supported"); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e3a683a..7f4f288 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -29,10 +29,13 @@ #include #include #include +#include +#include #undef _GNU_SOURCE #include "event.h" #include "string.h" +#include "strlist.h" #include "debug.h" #include "parse-events.h" /* For debugfs_path */ #include "probe-event.h" @@ -43,6 +46,19 @@ #define semantic_error(msg ...) die("Semantic error :" msg) +/* If there is no space to write, returns -E2BIG. */ +static int e_snprintf(char *str, size_t size, const char *format, ...) +{ + int ret; + va_list ap; + va_start(ap, format); + ret = vsnprintf(str, size, format, ap); + va_end(ap); + if (ret >= (int)size) + ret = -E2BIG; + return ret; +} + /* Parse probepoint definition. */ static void parse_perf_probe_probepoint(char *arg, struct probe_point *pp) { @@ -166,6 +182,103 @@ int parse_perf_probe_event(const char *str, struct probe_point *pp) return need_dwarf; } +/* Parse kprobe_events event into struct probe_point */ +void parse_trace_kprobe_event(const char *str, char **group, char **event, + struct probe_point *pp) +{ + char pr; + char *p; + int ret, i, argc; + char **argv; + + pr_debug("Parsing kprobe_events: %s\n", str); + argv = argv_split(str, &argc); + if (!argv) + die("argv_split failed."); + if (argc < 2) + semantic_error("Too less arguments."); + + /* Scan event and group name. */ + ret = sscanf(argv[0], "%c:%m[^/ \t]/%m[^ \t]", + &pr, group, event); + if (ret != 3) + semantic_error("Failed to parse event name: %s", argv[0]); + pr_debug("Group:%s Event:%s probe:%c\n", *group, *event, pr); + + if (!pp) + goto end; + + pp->retprobe = (pr == 'r'); + + /* Scan function name and offset */ + ret = sscanf(argv[1], "%m[^+]+%d", &pp->function, &pp->offset); + if (ret == 1) + pp->offset = 0; + + /* kprobe_events doesn't have this information */ + pp->line = 0; + pp->file = NULL; + + pp->nr_args = argc - 2; + pp->args = zalloc(sizeof(char *) * pp->nr_args); + for (i = 0; i < pp->nr_args; i++) { + p = strchr(argv[i + 2], '='); + if (p) /* We don't need which register is assigned. */ + *p = '\0'; + pp->args[i] = strdup(argv[i + 2]); + if (!pp->args[i]) + die("Failed to copy argument."); + } + +end: + argv_free(argv); +} + +int synthesize_perf_probe_event(struct probe_point *pp) +{ + char *buf; + char offs[64] = "", line[64] = ""; + int i, len, ret; + + pp->probes[0] = buf = zalloc(MAX_CMDLEN); + if (!buf) + die("Failed to allocate memory by zalloc."); + if (pp->offset) { + ret = e_snprintf(offs, 64, "+%d", pp->offset); + if (ret <= 0) + goto error; + } + if (pp->line) { + ret = e_snprintf(line, 64, ":%d", pp->line); + if (ret <= 0) + goto error; + } + + if (pp->function) + ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->function, + offs, pp->retprobe ? "%return" : "", line); + else + ret = e_snprintf(buf, MAX_CMDLEN, "%s%s%s%s", pp->file, line); + if (ret <= 0) + goto error; + len = ret; + + for (i = 0; i < pp->nr_args; i++) { + ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s", + pp->args[i]); + if (ret <= 0) + goto error; + len += ret; + } + pp->found = 1; + + return pp->found; +error: + free(pp->probes[0]); + + return ret; +} + int synthesize_trace_kprobe_event(struct probe_point *pp) { char *buf; @@ -174,15 +287,15 @@ int synthesize_trace_kprobe_event(struct probe_point *pp) pp->probes[0] = buf = zalloc(MAX_CMDLEN); if (!buf) die("Failed to allocate memory by zalloc."); - ret = snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); - if (ret <= 0 || ret >= MAX_CMDLEN) + ret = e_snprintf(buf, MAX_CMDLEN, "%s+%d", pp->function, pp->offset); + if (ret <= 0) goto error; len = ret; for (i = 0; i < pp->nr_args; i++) { - ret = snprintf(&buf[len], MAX_CMDLEN - len, " %s", - pp->args[i]); - if (ret <= 0 || ret >= MAX_CMDLEN - len) + ret = e_snprintf(&buf[len], MAX_CMDLEN - len, " %s", + pp->args[i]); + if (ret <= 0) goto error; len += ret; } @@ -191,12 +304,105 @@ int synthesize_trace_kprobe_event(struct probe_point *pp) return pp->found; error: free(pp->probes[0]); - if (ret > 0) - ret = -E2BIG; return ret; } +static int open_kprobe_events(int flags, int mode) +{ + char buf[PATH_MAX]; + int ret; + + ret = e_snprintf(buf, PATH_MAX, "%s/../kprobe_events", debugfs_path); + if (ret < 0) + die("Failed to make kprobe_events path."); + + ret = open(buf, flags, mode); + if (ret < 0) { + if (errno == ENOENT) + die("kprobe_events file does not exist -" + " please rebuild with CONFIG_KPROBE_TRACER."); + else + die("Could not open kprobe_events file: %s", + strerror(errno)); + } + return ret; +} + +/* Get raw string list of current kprobe_events */ +static struct strlist *get_trace_kprobe_event_rawlist(int fd) +{ + int ret, idx; + FILE *fp; + char buf[MAX_CMDLEN]; + char *p; + struct strlist *sl; + + sl = strlist__new(true, NULL); + + fp = fdopen(dup(fd), "r"); + while (!feof(fp)) { + p = fgets(buf, MAX_CMDLEN, fp); + if (!p) + break; + + idx = strlen(p) - 1; + if (p[idx] == '\n') + p[idx] = '\0'; + ret = strlist__add(sl, buf); + if (ret < 0) + die("strlist__add failed: %s", strerror(-ret)); + } + fclose(fp); + + return sl; +} + +/* Free and zero clear probe_point */ +static void clear_probe_point(struct probe_point *pp) +{ + int i; + + if (pp->function) + free(pp->function); + if (pp->file) + free(pp->file); + for (i = 0; i < pp->nr_args; i++) + free(pp->args[i]); + if (pp->args) + free(pp->args); + for (i = 0; i < pp->found; i++) + free(pp->probes[i]); + memset(pp, 0, sizeof(pp)); +} + +/* List up current perf-probe events */ +void show_perf_probe_events(void) +{ + unsigned int i; + int fd; + char *group, *event; + struct probe_point pp; + struct strlist *rawlist; + struct str_node *ent; + + fd = open_kprobe_events(O_RDONLY, 0); + rawlist = get_trace_kprobe_event_rawlist(fd); + close(fd); + + for (i = 0; i < strlist__nr_entries(rawlist); i++) { + ent = strlist__entry(rawlist, i); + parse_trace_kprobe_event(ent->s, &group, &event, &pp); + synthesize_perf_probe_event(&pp); + printf("[%s:%s]\t%s\n", group, event, pp.probes[0]); + free(group); + free(event); + clear_probe_point(&pp); + } + + strlist__delete(rawlist); +} + static int write_trace_kprobe_event(int fd, const char *buf) { int ret; @@ -216,16 +422,7 @@ void add_trace_kprobe_events(struct probe_point *probes, int nr_probes) struct probe_point *pp; char buf[MAX_CMDLEN]; - snprintf(buf, MAX_CMDLEN, "%s/../kprobe_events", debugfs_path); - fd = open(buf, O_WRONLY, O_APPEND); - if (fd < 0) { - if (errno == ENOENT) - die("kprobe_events file does not exist -" - " please rebuild with CONFIG_KPROBE_TRACER."); - else - die("Could not open kprobe_events file: %s", - strerror(errno)); - } + fd = open_kprobe_events(O_WRONLY, O_APPEND); for (j = 0; j < nr_probes; j++) { pp = probes + j; diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 0089c45..88db7d1 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -2,9 +2,14 @@ #define _PROBE_EVENT_H #include "probe-finder.h" +#include "strlist.h" extern int parse_perf_probe_event(const char *str, struct probe_point *pp); +extern int synthesize_perf_probe_event(struct probe_point *pp); +extern void parse_trace_kprobe_event(const char *str, char **group, + char **event, struct probe_point *pp); extern int synthesize_trace_kprobe_event(struct probe_point *pp); extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes); +extern void show_perf_probe_events(void); #endif /*_PROBE_EVENT_H */ -- cgit v0.10.2 From b498ce1f2753b9724b2fc05d2057f7d1490cfa93 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 30 Nov 2009 19:20:25 -0500 Subject: perf probe: Simplify event naming Simplify event naming as _. Each event name is globally unique (group name is not checked). So, if there is schedule_0, next probe event on schedule() will be schedule_1. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <20091201002024.10235.2353.stgit@harusame> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 7f4f288..e42f3ac 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -403,6 +403,29 @@ void show_perf_probe_events(void) strlist__delete(rawlist); } +/* Get current perf-probe event names */ +static struct strlist *get_perf_event_names(int fd) +{ + unsigned int i; + char *group, *event; + struct strlist *sl, *rawlist; + struct str_node *ent; + + rawlist = get_trace_kprobe_event_rawlist(fd); + + sl = strlist__new(false, NULL); + for (i = 0; i < strlist__nr_entries(rawlist); i++) { + ent = strlist__entry(rawlist, i); + parse_trace_kprobe_event(ent->s, &group, &event, NULL); + strlist__add(sl, event); + free(group); + } + + strlist__delete(rawlist); + + return sl; +} + static int write_trace_kprobe_event(int fd, const char *buf) { int ret; @@ -416,30 +439,46 @@ static int write_trace_kprobe_event(int fd, const char *buf) return ret; } +static void get_new_event_name(char *buf, size_t len, const char *base, + struct strlist *namelist) +{ + int i, ret; + for (i = 0; i < MAX_EVENT_INDEX; i++) { + ret = e_snprintf(buf, len, "%s_%d", base, i); + if (ret < 0) + die("snprintf() failed: %s", strerror(-ret)); + if (!strlist__has_entry(namelist, buf)) + break; + } + if (i == MAX_EVENT_INDEX) + die("Too many events are on the same function."); +} + void add_trace_kprobe_events(struct probe_point *probes, int nr_probes) { int i, j, fd; struct probe_point *pp; char buf[MAX_CMDLEN]; + char event[64]; + struct strlist *namelist; - fd = open_kprobe_events(O_WRONLY, O_APPEND); + fd = open_kprobe_events(O_RDWR, O_APPEND); + /* Get current event names */ + namelist = get_perf_event_names(fd); for (j = 0; j < nr_probes; j++) { pp = probes + j; - if (pp->found == 1) { - snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x %s\n", - pp->retprobe ? 'r' : 'p', PERFPROBE_GROUP, - pp->function, pp->offset, pp->probes[0]); + for (i = 0; i < pp->found; i++) { + /* Get an unused new event name */ + get_new_event_name(event, 64, pp->function, namelist); + snprintf(buf, MAX_CMDLEN, "%c:%s/%s %s\n", + pp->retprobe ? 'r' : 'p', + PERFPROBE_GROUP, event, + pp->probes[i]); write_trace_kprobe_event(fd, buf); - } else - for (i = 0; i < pp->found; i++) { - snprintf(buf, MAX_CMDLEN, "%c:%s/%s_%x_%d %s\n", - pp->retprobe ? 'r' : 'p', - PERFPROBE_GROUP, - pp->function, pp->offset, i, - pp->probes[i]); - write_trace_kprobe_event(fd, buf); - } + /* Add added event name to namelist */ + strlist__add(namelist, event); + } } close(fd); } diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 88db7d1..0c6fe56 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -12,4 +12,7 @@ extern int synthesize_trace_kprobe_event(struct probe_point *pp); extern void add_trace_kprobe_events(struct probe_point *probes, int nr_probes); extern void show_perf_probe_events(void); +/* Maximum index number of event-name postfix */ +#define MAX_EVENT_INDEX 1024 + #endif /*_PROBE_EVENT_H */ -- cgit v0.10.2 From 59d069eb5ae9b033ed1c124c92e1532c4a958991 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 1 Dec 2009 17:30:08 +0800 Subject: perf_event: Initialize data.period in perf_swevent_hrtimer() In current code in perf_swevent_hrtimer(), data.period is not initialized, The result is obvious wrong: # ./perf record -f -e cpu-clock make # ./perf report # Samples: 1740 # # Overhead Command ...... # ........ ........ .......................................... # 1025422183050275328.00% sh libc-2.9.90.so ... 1025422183050275328.00% perl libperl.so ... 1025422168240043264.00% perl [kernel] ... 1025422030011210752.00% perl [kernel] ... Signed-off-by: Xiao Guangrong Acked-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: LKML-Reference: <4B14E220.2050107@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 0b9ca2d..040ee51 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4010,6 +4010,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) event->pmu->read(event); data.addr = 0; + data.period = event->hw.last_period; regs = get_irq_regs(); /* * In case we exclude kernel IPs or are somehow not in interrupt -- cgit v0.10.2 From 7716977b6ae5a0cdd0afab5c6035c4d0ce53f599 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 30 Nov 2009 13:24:18 +0000 Subject: mfd: Correct WM831X_MAX_ISEL_VALUE There was confusion between the array size and the highest ISEL value possible. Reported-by: Dan Carpenter Signed-off-by: Mark Brown Signed-off-by: Samuel Ortiz diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c index 49b7885..7f27576 100644 --- a/drivers/mfd/wm831x-core.c +++ b/drivers/mfd/wm831x-core.c @@ -29,7 +29,7 @@ /* Current settings - values are 2*2^(reg_val/4) microamps. These are * exported since they are used by multiple drivers. */ -int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL] = { +int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1] = { 2, 2, 3, diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h index f954663..955d30f 100644 --- a/include/linux/mfd/wm831x/regulator.h +++ b/include/linux/mfd/wm831x/regulator.h @@ -1212,7 +1212,7 @@ #define WM831X_LDO1_OK_SHIFT 0 /* LDO1_OK */ #define WM831X_LDO1_OK_WIDTH 1 /* LDO1_OK */ -#define WM831X_ISINK_MAX_ISEL 56 -extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL]; +#define WM831X_ISINK_MAX_ISEL 55 +extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL + 1]; #endif -- cgit v0.10.2 From 6f054164322bc6c1233402b9ed6b40d4af39a98f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Dec 2009 13:38:45 +0000 Subject: 9p: fix build breakage introduced by FS-Cache While building 2.6.32-rc8-git2 for Fedora I noticed the following thinko in commit 201a15428bd54f83eccec8b7c64a04b8f9431204 ("FS-Cache: Handle pages pending storage that get evicted under OOM conditions"): fs/9p/cache.c: In function '__v9fs_fscache_release_page': fs/9p/cache.c:346: error: 'vnode' undeclared (first use in this function) fs/9p/cache.c:346: error: (Each undeclared identifier is reported only once fs/9p/cache.c:346: error: for each function it appears in.) make[2]: *** [fs/9p/cache.o] Error 1 Fix the 9P filesystem to correctly construct the argument to fscache_maybe_release_page(). Signed-off-by: Kyle McMartin Signed-off-by: Xiaotian Feng [from identical patch] Signed-off-by: Stefan Lippers-Hollmann [from identical patch] Signed-off-by: David Howells Signed-off-by: Linus Torvalds diff --git a/fs/9p/cache.c b/fs/9p/cache.c index bcc5357..e777961 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -343,7 +343,7 @@ int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) BUG_ON(!vcookie->fscache); - return fscache_maybe_release_page(vnode->cache, page, gfp); + return fscache_maybe_release_page(vcookie->fscache, page, gfp); } void __v9fs_fscache_invalidate_page(struct page *page) -- cgit v0.10.2 From fa1dae4906982b5d896c07613b1fe42456133b1c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Dec 2009 13:52:08 +0000 Subject: SLOW_WORK: Fix the CONFIG_MODULES=n case Commits 3d7a641 ("SLOW_WORK: Wait for outstanding work items belonging to a module to clear") introduced some code to make sure that all of a module's slow-work items were complete before that module was removed, and commit 3bde31a ("SLOW_WORK: Allow a requeueable work item to sleep till the thread is needed") further extended that, breaking it in the process if CONFIG_MODULES=n: CC kernel/slow-work.o kernel/slow-work.c: In function 'slow_work_execute': kernel/slow-work.c:313: error: 'slow_work_thread_processing' undeclared (first use in this function) kernel/slow-work.c:313: error: (Each undeclared identifier is reported only once kernel/slow-work.c:313: error: for each function it appears in.) kernel/slow-work.c: In function 'slow_work_wait_for_items': kernel/slow-work.c:950: error: 'slow_work_unreg_sync_lock' undeclared (first use in this function) kernel/slow-work.c:951: error: 'slow_work_unreg_wq' undeclared (first use in this function) kernel/slow-work.c:961: error: 'slow_work_unreg_work_item' undeclared (first use in this function) kernel/slow-work.c:974: error: 'slow_work_unreg_module' undeclared (first use in this function) kernel/slow-work.c:977: error: 'slow_work_thread_processing' undeclared (first use in this function) make[1]: *** [kernel/slow-work.o] Error 1 Fix this by: (1) Extracting the bits of slow_work_execute() that are contingent on CONFIG_MODULES, and the bits that should be, into inline functions and placing them into the #ifdef'd section that defines the relevant variables and adding stubs for moduleless kernels. This allows the removal of some #ifdefs. (2) #ifdef'ing out the contents of slow_work_wait_for_items() in moduleless kernels. The four functions related to handling module unloading synchronisation (and their associated variables) could be offloaded into a separate .c file, but each function is only used once and three of them are tiny, so doing so would prevent them from being inlined. Reported-by: Ingo Molnar Signed-off-by: David Howells Signed-off-by: Linus Torvalds diff --git a/kernel/slow-work.c b/kernel/slow-work.c index da94f3c..b5c17f1 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -109,6 +109,30 @@ static struct module *slow_work_unreg_module; static struct slow_work *slow_work_unreg_work_item; static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq); static DEFINE_MUTEX(slow_work_unreg_sync_lock); + +static void slow_work_set_thread_processing(int id, struct slow_work *work) +{ + if (work) + slow_work_thread_processing[id] = work->owner; +} +static void slow_work_done_thread_processing(int id, struct slow_work *work) +{ + struct module *module = slow_work_thread_processing[id]; + + slow_work_thread_processing[id] = NULL; + smp_mb(); + if (slow_work_unreg_work_item == work || + slow_work_unreg_module == module) + wake_up_all(&slow_work_unreg_wq); +} +static void slow_work_clear_thread_processing(int id) +{ + slow_work_thread_processing[id] = NULL; +} +#else +static void slow_work_set_thread_processing(int id, struct slow_work *work) {} +static void slow_work_done_thread_processing(int id, struct slow_work *work) {} +static void slow_work_clear_thread_processing(int id) {} #endif /* @@ -197,9 +221,6 @@ static unsigned slow_work_calc_vsmax(void) */ static noinline bool slow_work_execute(int id) { -#ifdef CONFIG_MODULES - struct module *module; -#endif struct slow_work *work = NULL; unsigned vsmax; bool very_slow; @@ -236,10 +257,7 @@ static noinline bool slow_work_execute(int id) very_slow = false; /* avoid the compiler warning */ } -#ifdef CONFIG_MODULES - if (work) - slow_work_thread_processing[id] = work->owner; -#endif + slow_work_set_thread_processing(id, work); if (work) { slow_work_mark_time(work); slow_work_begin_exec(id, work); @@ -287,15 +305,7 @@ static noinline bool slow_work_execute(int id) /* sort out the race between module unloading and put_ref() */ slow_work_put_ref(work); - -#ifdef CONFIG_MODULES - module = slow_work_thread_processing[id]; - slow_work_thread_processing[id] = NULL; - smp_mb(); - if (slow_work_unreg_work_item == work || - slow_work_unreg_module == module) - wake_up_all(&slow_work_unreg_wq); -#endif + slow_work_done_thread_processing(id, work); return true; @@ -310,7 +320,7 @@ auto_requeue: else list_add_tail(&work->link, &slow_work_queue); spin_unlock_irq(&slow_work_queue_lock); - slow_work_thread_processing[id] = NULL; + slow_work_clear_thread_processing(id); return true; } @@ -943,6 +953,7 @@ EXPORT_SYMBOL(slow_work_register_user); */ static void slow_work_wait_for_items(struct module *module) { +#ifdef CONFIG_MODULES DECLARE_WAITQUEUE(myself, current); struct slow_work *work; int loop; @@ -989,6 +1000,7 @@ static void slow_work_wait_for_items(struct module *module) remove_wait_queue(&slow_work_unreg_wq, &myself); mutex_unlock(&slow_work_unreg_sync_lock); +#endif /* CONFIG_MODULES */ } /** -- cgit v0.10.2 From 3350b2acdd39d23db52710045536b943fe38a35c Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Tue, 1 Dec 2009 14:09:24 +0000 Subject: CacheFiles: Update IMA counters when using dentry_open When IMA is active, using dentry_open without updating the IMA counters will result in free/open imbalance errors when fput is eventually called. Signed-off-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: Linus Torvalds diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 1d83325..a6c8c6f 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -11,6 +11,7 @@ #include #include +#include #include "internal.h" /* @@ -922,6 +923,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) if (IS_ERR(file)) { ret = PTR_ERR(file); } else { + ima_counts_get(file); ret = -EIO; if (file->f_op->write) { pos = (loff_t) page->index << PAGE_SHIFT; -- cgit v0.10.2 From f13a48bd798a159291ca583b95453171b88b7448 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 1 Dec 2009 15:36:11 +0000 Subject: SLOW_WORK: Move slow_work's proc file to debugfs Move slow_work's debugging proc file to debugfs. Signed-off-by: David Howells Requested-and-acked-by: Ingo Molnar Signed-off-by: Linus Torvalds diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt index 52bc314..9dbf4470c 100644 --- a/Documentation/slow-work.txt +++ b/Documentation/slow-work.txt @@ -279,9 +279,9 @@ The slow-work thread pool has a number of configurables: VIEWING EXECUTING AND QUEUED ITEMS ================================== -If CONFIG_SLOW_WORK_PROC is enabled, a proc file is made available: +If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available: - /proc/slow_work_rq + /sys/kernel/debug/slow_work/runqueue through which the list of work items being executed and the queues of items to be executed may be viewed. The owner of a work item is given the chance to diff --git a/include/linux/slow-work.h b/include/linux/slow-work.h index 5035a26..13337bf 100644 --- a/include/linux/slow-work.h +++ b/include/linux/slow-work.h @@ -20,7 +20,7 @@ #include struct slow_work; -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct seq_file; #endif @@ -42,8 +42,8 @@ struct slow_work_ops { /* execute a work item */ void (*execute)(struct slow_work *work); -#ifdef CONFIG_SLOW_WORK_PROC - /* describe a work item for /proc */ +#ifdef CONFIG_SLOW_WORK_DEBUG + /* describe a work item for debugfs */ void (*desc)(struct slow_work *work, struct seq_file *m); #endif }; @@ -64,7 +64,7 @@ struct slow_work { #define SLOW_WORK_DELAYED 5 /* item is struct delayed_slow_work with active timer */ const struct slow_work_ops *ops; /* operations table for this item */ struct list_head link; /* link in queue */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct timespec mark; /* jiffies at which queued or exec begun */ #endif }; diff --git a/init/Kconfig b/init/Kconfig index ab5c648..39923cc 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1098,12 +1098,12 @@ config SLOW_WORK See Documentation/slow-work.txt. -config SLOW_WORK_PROC - bool "Slow work debugging through /proc" +config SLOW_WORK_DEBUG + bool "Slow work debugging through debugfs" default n - depends on SLOW_WORK && PROC_FS + depends on SLOW_WORK && DEBUG_FS help - Display the contents of the slow work run queue through /proc, + Display the contents of the slow work run queue through debugfs, including items currently executing. See Documentation/slow-work.txt. diff --git a/kernel/Makefile b/kernel/Makefile index 776ffed..d7c13d2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -94,7 +94,7 @@ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o -obj-$(CONFIG_SLOW_WORK_PROC) += slow-work-proc.o +obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) diff --git a/kernel/slow-work-debugfs.c b/kernel/slow-work-debugfs.c new file mode 100644 index 0000000..e45c436 --- /dev/null +++ b/kernel/slow-work-debugfs.c @@ -0,0 +1,227 @@ +/* Slow work debugging + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "slow-work.h" + +#define ITERATOR_SHIFT (BITS_PER_LONG - 4) +#define ITERATOR_SELECTOR (0xfUL << ITERATOR_SHIFT) +#define ITERATOR_COUNTER (~ITERATOR_SELECTOR) + +void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m) +{ + seq_puts(m, "Slow-work: New thread"); +} + +/* + * Render the time mark field on a work item into a 5-char time with units plus + * a space + */ +static void slow_work_print_mark(struct seq_file *m, struct slow_work *work) +{ + struct timespec now, diff; + + now = CURRENT_TIME; + diff = timespec_sub(now, work->mark); + + if (diff.tv_sec < 0) + seq_puts(m, " -ve "); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000) + seq_printf(m, "%3luns ", diff.tv_nsec); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000) + seq_printf(m, "%3luus ", diff.tv_nsec / 1000); + else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000) + seq_printf(m, "%3lums ", diff.tv_nsec / 1000000); + else if (diff.tv_sec <= 1) + seq_puts(m, " 1s "); + else if (diff.tv_sec < 60) + seq_printf(m, "%4lus ", diff.tv_sec); + else if (diff.tv_sec < 60 * 60) + seq_printf(m, "%4lum ", diff.tv_sec / 60); + else if (diff.tv_sec < 60 * 60 * 24) + seq_printf(m, "%4luh ", diff.tv_sec / 3600); + else + seq_puts(m, "exces "); +} + +/* + * Describe a slow work item for debugfs + */ +static int slow_work_runqueue_show(struct seq_file *m, void *v) +{ + struct slow_work *work; + struct list_head *p = v; + unsigned long id; + + switch ((unsigned long) v) { + case 1: + seq_puts(m, "THR PID ITEM ADDR FL MARK DESC\n"); + return 0; + case 2: + seq_puts(m, "=== ===== ================ == ===== ==========\n"); + return 0; + + case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1: + id = (unsigned long) v - 3; + + read_lock(&slow_work_execs_lock); + work = slow_work_execs[id]; + if (work) { + smp_read_barrier_depends(); + + seq_printf(m, "%3lu %5d %16p %2lx ", + id, slow_work_pids[id], work, work->flags); + slow_work_print_mark(m, work); + + if (work->ops->desc) + work->ops->desc(work, m); + seq_putc(m, '\n'); + } + read_unlock(&slow_work_execs_lock); + return 0; + + default: + work = list_entry(p, struct slow_work, link); + seq_printf(m, "%3s - %16p %2lx ", + work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq", + work, work->flags); + slow_work_print_mark(m, work); + + if (work->ops->desc) + work->ops->desc(work, m); + seq_putc(m, '\n'); + return 0; + } +} + +/* + * map the iterator to a work item + */ +static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos) +{ + struct list_head *p; + unsigned long count, id; + + switch (*_pos >> ITERATOR_SHIFT) { + case 0x0: + if (*_pos == 0) + *_pos = 1; + if (*_pos < 3) + return (void *)(unsigned long) *_pos; + if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT) + for (id = *_pos - 3; + id < SLOW_WORK_THREAD_LIMIT; + id++, (*_pos)++) + if (slow_work_execs[id]) + return (void *)(unsigned long) *_pos; + *_pos = 0x1UL << ITERATOR_SHIFT; + + case 0x1: + count = *_pos & ITERATOR_COUNTER; + list_for_each(p, &slow_work_queue) { + if (count == 0) + return p; + count--; + } + *_pos = 0x2UL << ITERATOR_SHIFT; + + case 0x2: + count = *_pos & ITERATOR_COUNTER; + list_for_each(p, &vslow_work_queue) { + if (count == 0) + return p; + count--; + } + *_pos = 0x3UL << ITERATOR_SHIFT; + + default: + return NULL; + } +} + +/* + * set up the iterator to start reading from the first line + */ +static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos) +{ + spin_lock_irq(&slow_work_queue_lock); + return slow_work_runqueue_index(m, _pos); +} + +/* + * move to the next line + */ +static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos) +{ + struct list_head *p = v; + unsigned long selector = *_pos >> ITERATOR_SHIFT; + + (*_pos)++; + switch (selector) { + case 0x0: + return slow_work_runqueue_index(m, _pos); + + case 0x1: + if (*_pos >> ITERATOR_SHIFT == 0x1) { + p = p->next; + if (p != &slow_work_queue) + return p; + } + *_pos = 0x2UL << ITERATOR_SHIFT; + p = &vslow_work_queue; + + case 0x2: + if (*_pos >> ITERATOR_SHIFT == 0x2) { + p = p->next; + if (p != &vslow_work_queue) + return p; + } + *_pos = 0x3UL << ITERATOR_SHIFT; + + default: + return NULL; + } +} + +/* + * clean up after reading + */ +static void slow_work_runqueue_stop(struct seq_file *m, void *v) +{ + spin_unlock_irq(&slow_work_queue_lock); +} + +static const struct seq_operations slow_work_runqueue_ops = { + .start = slow_work_runqueue_start, + .stop = slow_work_runqueue_stop, + .next = slow_work_runqueue_next, + .show = slow_work_runqueue_show, +}; + +/* + * open "/sys/kernel/debug/slow_work/runqueue" to list queue contents + */ +static int slow_work_runqueue_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &slow_work_runqueue_ops); +} + +const struct file_operations slow_work_runqueue_fops = { + .owner = THIS_MODULE, + .open = slow_work_runqueue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; diff --git a/kernel/slow-work-proc.c b/kernel/slow-work-proc.c deleted file mode 100644 index 3988032..0000000 --- a/kernel/slow-work-proc.c +++ /dev/null @@ -1,227 +0,0 @@ -/* Slow work debugging - * - * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public Licence - * as published by the Free Software Foundation; either version - * 2 of the Licence, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include "slow-work.h" - -#define ITERATOR_SHIFT (BITS_PER_LONG - 4) -#define ITERATOR_SELECTOR (0xfUL << ITERATOR_SHIFT) -#define ITERATOR_COUNTER (~ITERATOR_SELECTOR) - -void slow_work_new_thread_desc(struct slow_work *work, struct seq_file *m) -{ - seq_puts(m, "Slow-work: New thread"); -} - -/* - * Render the time mark field on a work item into a 5-char time with units plus - * a space - */ -static void slow_work_print_mark(struct seq_file *m, struct slow_work *work) -{ - struct timespec now, diff; - - now = CURRENT_TIME; - diff = timespec_sub(now, work->mark); - - if (diff.tv_sec < 0) - seq_puts(m, " -ve "); - else if (diff.tv_sec == 0 && diff.tv_nsec < 1000) - seq_printf(m, "%3luns ", diff.tv_nsec); - else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000) - seq_printf(m, "%3luus ", diff.tv_nsec / 1000); - else if (diff.tv_sec == 0 && diff.tv_nsec < 1000000000) - seq_printf(m, "%3lums ", diff.tv_nsec / 1000000); - else if (diff.tv_sec <= 1) - seq_puts(m, " 1s "); - else if (diff.tv_sec < 60) - seq_printf(m, "%4lus ", diff.tv_sec); - else if (diff.tv_sec < 60 * 60) - seq_printf(m, "%4lum ", diff.tv_sec / 60); - else if (diff.tv_sec < 60 * 60 * 24) - seq_printf(m, "%4luh ", diff.tv_sec / 3600); - else - seq_puts(m, "exces "); -} - -/* - * Describe a slow work item for /proc - */ -static int slow_work_runqueue_show(struct seq_file *m, void *v) -{ - struct slow_work *work; - struct list_head *p = v; - unsigned long id; - - switch ((unsigned long) v) { - case 1: - seq_puts(m, "THR PID ITEM ADDR FL MARK DESC\n"); - return 0; - case 2: - seq_puts(m, "=== ===== ================ == ===== ==========\n"); - return 0; - - case 3 ... 3 + SLOW_WORK_THREAD_LIMIT - 1: - id = (unsigned long) v - 3; - - read_lock(&slow_work_execs_lock); - work = slow_work_execs[id]; - if (work) { - smp_read_barrier_depends(); - - seq_printf(m, "%3lu %5d %16p %2lx ", - id, slow_work_pids[id], work, work->flags); - slow_work_print_mark(m, work); - - if (work->ops->desc) - work->ops->desc(work, m); - seq_putc(m, '\n'); - } - read_unlock(&slow_work_execs_lock); - return 0; - - default: - work = list_entry(p, struct slow_work, link); - seq_printf(m, "%3s - %16p %2lx ", - work->flags & SLOW_WORK_VERY_SLOW ? "vsq" : "sq", - work, work->flags); - slow_work_print_mark(m, work); - - if (work->ops->desc) - work->ops->desc(work, m); - seq_putc(m, '\n'); - return 0; - } -} - -/* - * map the iterator to a work item - */ -static void *slow_work_runqueue_index(struct seq_file *m, loff_t *_pos) -{ - struct list_head *p; - unsigned long count, id; - - switch (*_pos >> ITERATOR_SHIFT) { - case 0x0: - if (*_pos == 0) - *_pos = 1; - if (*_pos < 3) - return (void *)(unsigned long) *_pos; - if (*_pos < 3 + SLOW_WORK_THREAD_LIMIT) - for (id = *_pos - 3; - id < SLOW_WORK_THREAD_LIMIT; - id++, (*_pos)++) - if (slow_work_execs[id]) - return (void *)(unsigned long) *_pos; - *_pos = 0x1UL << ITERATOR_SHIFT; - - case 0x1: - count = *_pos & ITERATOR_COUNTER; - list_for_each(p, &slow_work_queue) { - if (count == 0) - return p; - count--; - } - *_pos = 0x2UL << ITERATOR_SHIFT; - - case 0x2: - count = *_pos & ITERATOR_COUNTER; - list_for_each(p, &vslow_work_queue) { - if (count == 0) - return p; - count--; - } - *_pos = 0x3UL << ITERATOR_SHIFT; - - default: - return NULL; - } -} - -/* - * set up the iterator to start reading from the first line - */ -static void *slow_work_runqueue_start(struct seq_file *m, loff_t *_pos) -{ - spin_lock_irq(&slow_work_queue_lock); - return slow_work_runqueue_index(m, _pos); -} - -/* - * move to the next line - */ -static void *slow_work_runqueue_next(struct seq_file *m, void *v, loff_t *_pos) -{ - struct list_head *p = v; - unsigned long selector = *_pos >> ITERATOR_SHIFT; - - (*_pos)++; - switch (selector) { - case 0x0: - return slow_work_runqueue_index(m, _pos); - - case 0x1: - if (*_pos >> ITERATOR_SHIFT == 0x1) { - p = p->next; - if (p != &slow_work_queue) - return p; - } - *_pos = 0x2UL << ITERATOR_SHIFT; - p = &vslow_work_queue; - - case 0x2: - if (*_pos >> ITERATOR_SHIFT == 0x2) { - p = p->next; - if (p != &vslow_work_queue) - return p; - } - *_pos = 0x3UL << ITERATOR_SHIFT; - - default: - return NULL; - } -} - -/* - * clean up after reading - */ -static void slow_work_runqueue_stop(struct seq_file *m, void *v) -{ - spin_unlock_irq(&slow_work_queue_lock); -} - -static const struct seq_operations slow_work_runqueue_ops = { - .start = slow_work_runqueue_start, - .stop = slow_work_runqueue_stop, - .next = slow_work_runqueue_next, - .show = slow_work_runqueue_show, -}; - -/* - * open "/proc/slow_work_rq" to list queue contents - */ -static int slow_work_runqueue_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &slow_work_runqueue_ops); -} - -const struct file_operations slow_work_runqueue_fops = { - .owner = THIS_MODULE, - .open = slow_work_runqueue_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; diff --git a/kernel/slow-work.c b/kernel/slow-work.c index b5c17f1..00889bd 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include "slow-work.h" static void slow_work_cull_timeout(unsigned long); @@ -138,7 +138,7 @@ static void slow_work_clear_thread_processing(int id) {} /* * Data for tracking currently executing items for indication through /proc */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT]; pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT]; DEFINE_RWLOCK(slow_work_execs_lock); @@ -823,7 +823,7 @@ static void slow_work_new_thread_execute(struct slow_work *work) static const struct slow_work_ops slow_work_new_thread_ops = { .owner = THIS_MODULE, .execute = slow_work_new_thread_execute, -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG .desc = slow_work_new_thread_desc, #endif }; @@ -1055,9 +1055,15 @@ static int __init init_slow_work(void) if (slow_work_max_max_threads < nr_cpus * 2) slow_work_max_max_threads = nr_cpus * 2; #endif -#ifdef CONFIG_SLOW_WORK_PROC - proc_create("slow_work_rq", S_IFREG | 0400, NULL, - &slow_work_runqueue_fops); +#ifdef CONFIG_SLOW_WORK_DEBUG + { + struct dentry *dbdir; + + dbdir = debugfs_create_dir("slow_work", NULL); + if (dbdir && !IS_ERR(dbdir)) + debugfs_create_file("runqueue", S_IFREG | 0400, dbdir, + NULL, &slow_work_runqueue_fops); + } #endif return 0; } diff --git a/kernel/slow-work.h b/kernel/slow-work.h index 3c2f007..321f3c5 100644 --- a/kernel/slow-work.h +++ b/kernel/slow-work.h @@ -19,7 +19,7 @@ /* * slow-work.c */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG extern struct slow_work *slow_work_execs[]; extern pid_t slow_work_pids[]; extern rwlock_t slow_work_execs_lock; @@ -30,9 +30,9 @@ extern struct list_head vslow_work_queue; extern spinlock_t slow_work_queue_lock; /* - * slow-work-proc.c + * slow-work-debugfs.c */ -#ifdef CONFIG_SLOW_WORK_PROC +#ifdef CONFIG_SLOW_WORK_DEBUG extern const struct file_operations slow_work_runqueue_fops; extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *); -- cgit v0.10.2 From 2b5e63f6b8f6566161a261a9face1de433d6608e Mon Sep 17 00:00:00 2001 From: Martin Michlmayr Date: Thu, 19 Nov 2009 16:40:09 +0000 Subject: MIPS: IP22/IP28 Disable early printk to fix boot problems on some systems. Some Debian users have reported that the kernel hangs early during boot on some IP22 systems. Thomas Bogendoerfer found that this is due to a "bad interaction between CONFIG_EARLY_PRINTK and overwritten prom memory during early boot". Since there's no fix yet, disable CONFIG_EARLY_PRINTK for now. Signed-off-by: Martin Michlmayr Cc: linux-mips@linux-mips.org Cc: Thomas Bogendoerfer Cc: Dmitri Vorobiev Patchwork: http://patchwork.linux-mips.org/patch/702/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 1aad0d9..ffdd651 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -358,7 +358,14 @@ config SGI_IP22 select SWAP_IO_SPACE select SYS_HAS_CPU_R4X00 select SYS_HAS_CPU_R5000 - select SYS_HAS_EARLY_PRINTK + # + # Disable EARLY_PRINTK for now since it leads to overwritten prom + # memory during early boot on some machines. + # + # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com + # for a more details discussion + # + # select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN @@ -410,7 +417,14 @@ config SGI_IP28 select SGI_HAS_ZILOG select SWAP_IO_SPACE select SYS_HAS_CPU_R10000 - select SYS_HAS_EARLY_PRINTK + # + # Disable EARLY_PRINTK for now since it leads to overwritten prom + # memory during early boot on some machines. + # + # See http://www.linux-mips.org/cgi-bin/mesg.cgi?a=linux-mips&i=20091119164009.GA15038%40deprecation.cyrius.com + # for a more details discussion + # + # select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN help -- cgit v0.10.2 From c677189af9faa3f26fae0fcb7ac59f477048b89a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 24 Nov 2009 13:16:02 +0000 Subject: MIPS: Fix build error if __xchg() is not getting inlined. If __xchg() is not getting inlined the outline version of the function will have a reference to __xchg_called_with_bad_pointer() which does not exist remaining. Fixed by using BUILD_BUG_ON() to check for allowable operand sizes. Signed-off-by: Ralf Baechle Patchwork: http://patchwork.linux-mips.org/patch/705/ diff --git a/arch/mips/include/asm/system.h b/arch/mips/include/asm/system.h index fcf5f98..83b5509 100644 --- a/arch/mips/include/asm/system.h +++ b/arch/mips/include/asm/system.h @@ -12,6 +12,7 @@ #ifndef _ASM_SYSTEM_H #define _ASM_SYSTEM_H +#include #include #include @@ -193,10 +194,6 @@ extern __u64 __xchg_u64_unsupported_on_32bit_kernels(volatile __u64 * m, __u64 v #define __xchg_u64 __xchg_u64_unsupported_on_32bit_kernels #endif -/* This function doesn't exist, so you'll get a linker error - if something tries to do an invalid xchg(). */ -extern void __xchg_called_with_bad_pointer(void); - static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size) { switch (size) { @@ -205,11 +202,17 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz case 8: return __xchg_u64(ptr, x); } - __xchg_called_with_bad_pointer(); + return x; } -#define xchg(ptr, x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, x) \ +({ \ + BUILD_BUG_ON(sizeof(*(ptr)) & ~0xc); \ + \ + ((__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \ +}) extern void set_handler(unsigned long offset, void *addr, unsigned long len); extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len); -- cgit v0.10.2 From e1eb3a983befdb422e1aae299bdab573d04929f6 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 26 Nov 2009 18:28:42 +0000 Subject: MIPS: Add missing definition for MADV_HWPOISON. Thanks to Joseph S. Myers for reporting this. Signed-off-by: Ralf Baechle Cc: "Joseph S. Myers" Patchwork: http://patchwork.linux-mips.org/patch/723/ diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h index a2250f3..c892bfb 100644 --- a/arch/mips/include/asm/mman.h +++ b/arch/mips/include/asm/mman.h @@ -75,6 +75,7 @@ #define MADV_MERGEABLE 12 /* KSM may merge identical pages */ #define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ +#define MADV_HWPOISON 100 /* poison a page for testing */ /* compatibility flags */ #define MAP_FILE 0 -- cgit v0.10.2 From 315fe625f878749a7d2b6b65a40c29bbbe6e1dc7 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Tue, 1 Dec 2009 14:55:25 +0800 Subject: MIPS: Loongson: Disallow 4kB pages Currently, with PAGE_SIZE_4KB, the kernel for loongson will hang on: Kernel panic - not syncing: Attempted to kill init! The possible reason is the cache aliases problem: Loongson 2F has 64kb, 4 way L1 Cache, the way size is 16kb, which is bigger then 4kb. so, If using 4kb page size, there is cache aliases problem. To avoid this kind of problem, extra cache flushing. The 2nd possible solution is 16kb page size which avoids cache aliases without the need for extra cache flushes. So we disable 4kB pages until the aliasing issue is solved. Signed-off-by: Wu Zhangjin Patchwork: http://patchwork.linux-mips.org/patch/736/ Cc: linux-mips@linux-mips.org Cc: zhangfx@lemote.com Signed-off-by: Ralf Baechle diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ffdd651..e232e50 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1453,6 +1453,7 @@ choice config PAGE_SIZE_4KB bool "4kB" + depends on !CPU_LOONGSON2 help This option select the standard 4kB Linux page size. On some R3000-family processors this is the only available page size. Using -- cgit v0.10.2 From f133f22dd6f413bdf71ebf7e00ce441d98ac7c87 Mon Sep 17 00:00:00 2001 From: Wu Zhangjin Date: Tue, 1 Dec 2009 14:55:42 +0800 Subject: MIPS: Loongson: Switch from flatmem to sparsemem With flatmem hibernation for Loongson will fail, and there are also some other problems such as broken files when using NFS or CIFS / Samba. The config help of sparsemem says: "This option provides some potential performance benefits, along with decreased code complexity." So to avoid the potential problems of FLATMEM, we disable FLATMEM directly and use SPARSEMEM instead. Related email thread: http://groups.google.com/group/loongson-dev/browse_thread/thread/b6b65890ec2b0f24/feb43e5aa7f55d9b?show_docid=feb43e5aa7f55d9b Reported-by: Tatu Kilappa Signed-off-by: Wu Zhangjin Patchwork: http://patchwork.linux-mips.org/patch/737/ Cc: linux-mips@linux-mips.org Cc: zhangfx@lemote.com Signed-off-by: Ralf Baechle diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index e232e50..fd7620f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1778,7 +1778,7 @@ config SYS_SUPPORTS_SMARTMIPS config ARCH_FLATMEM_ENABLE def_bool y - depends on !NUMA + depends on !NUMA && !CPU_LOONGSON2 config ARCH_DISCONTIGMEM_ENABLE bool -- cgit v0.10.2 From bf56a4ea9f1683c5b223fd3a5dbea23f1fa91c34 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:20 +0800 Subject: trace_syscalls: Remove unused event_syscall_enter and event_syscall_exit fix event_enter_##sname->event fix event_exit_##sname->event remove unused event_syscall_enter and event_syscall_exit Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D278.4090209@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b50974a..2f7c539 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -178,7 +178,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ event_enter_##sname = { \ .name = "sys_enter"#sname, \ .system = "syscalls", \ - .event = &event_syscall_enter, \ + .event = &enter_syscall_print_##sname, \ .raw_init = init_enter_##sname, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ @@ -214,7 +214,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ event_exit_##sname = { \ .name = "sys_exit"#sname, \ .system = "syscalls", \ - .event = &event_syscall_exit, \ + .event = &exit_syscall_print_##sname, \ .raw_init = init_exit_##sname, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 51ee17d..5f8827c 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -37,8 +37,6 @@ extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); -extern struct trace_event event_syscall_enter; -extern struct trace_event event_syscall_exit; extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 63aa807..00d6e17 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -444,14 +444,6 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } -struct trace_event event_syscall_enter = { - .trace = print_syscall_enter, -}; - -struct trace_event event_syscall_exit = { - .trace = print_syscall_exit, -}; - int __init init_ftrace_syscalls(void) { struct syscall_metadata *meta; -- cgit v0.10.2 From 31c16b13349970b2684248c7d8608d2a96ae135d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:30 +0800 Subject: trace_syscalls: Set event_enter_##sname->data to its metadata Set event_enter_##sname->data to its metadata, it makes codes simpler. Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D282.7050709@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2f7c539..d3c9fd0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -153,6 +153,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ #define __SC_STR_TDECL6(t, a, ...) #t, __SC_STR_TDECL5(__VA_ARGS__) #define SYSCALL_TRACE_ENTER_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_enter_##sname; \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ @@ -184,11 +185,12 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_ENTER_PROFILE_INIT(sname) \ } #define SYSCALL_TRACE_EXIT_EVENT(sname) \ + static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_exit_##sname; \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ @@ -220,7 +222,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ - .data = "sys"#sname, \ + .data = (void *)&__syscall_meta_##sname,\ TRACE_SYS_EXIT_PROFILE_INIT(sname) \ } diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 5f8827c..c5265c8 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -34,7 +34,7 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); -extern int syscall_name_to_nr(char *name); +extern int syscall_name_to_nr(const char *name); void set_syscall_enter_id(int num, int id); void set_syscall_exit_id(int num, int id); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 00d6e17..39649b1 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } -int syscall_name_to_nr(char *name) +int syscall_name_to_nr(const char *name) { int i; @@ -172,18 +172,11 @@ extern char *__bad_type_size(void); int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) { int i; - int nr; int ret; - struct syscall_metadata *entry; + struct syscall_metadata *entry = call->data; struct syscall_trace_enter trace; int offset = offsetof(struct syscall_trace_enter, args); - nr = syscall_name_to_nr(call->data); - entry = syscall_nr_to_meta(nr); - - if (!entry) - return 0; - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" "\tsigned:%u;\n", SYSCALL_FIELD(int, nr)); @@ -245,18 +238,11 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) int syscall_enter_define_fields(struct ftrace_event_call *call) { struct syscall_trace_enter trace; - struct syscall_metadata *meta; + struct syscall_metadata *meta = call->data; int ret; - int nr; int i; int offset = offsetof(typeof(trace), args); - nr = syscall_name_to_nr(call->data); - meta = syscall_nr_to_meta(nr); - - if (!meta) - return 0; - ret = trace_define_common_fields(call); if (ret) return ret; @@ -366,9 +352,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; - char *name; + const char *name; - name = (char *)call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return -ENOSYS; @@ -389,9 +375,9 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; - char *name; + const char *name; - name = (char *)call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return; @@ -407,9 +393,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; - char *name; + const char *name; - name = call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return -ENOSYS; @@ -430,9 +416,9 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; - char *name; + const char *name; - name = call->data; + name = ((struct syscall_metadata *)call->data)->name; num = syscall_name_to_nr(name); if (num < 0 || num >= NR_syscalls) return; -- cgit v0.10.2 From fcc19438dda38dacc8c144e2db3ebc6b9fd4f8b8 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:36 +0800 Subject: trace_syscalls: Remove enter_id exit_id use ->enter_event->id instead of ->enter_id use ->exit_event->id instead of ->exit_id Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D288.7030001@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d3c9fd0..b9af875 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -168,7 +168,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ if (!id) \ return -ENODEV; \ event_enter_##sname.id = id; \ - set_syscall_enter_id(num, id); \ INIT_LIST_HEAD(&event_enter_##sname.fields); \ return 0; \ } \ @@ -205,7 +204,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ if (!id) \ return -ENODEV; \ event_exit_##sname.id = id; \ - set_syscall_exit_id(num, id); \ INIT_LIST_HEAD(&event_exit_##sname.fields); \ return 0; \ } \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index c5265c8..ca09561 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -15,8 +15,6 @@ * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) - * @enter_id: associated ftrace enter event id - * @exit_id: associated ftrace exit event id * @enter_event: associated syscall_enter trace event * @exit_event: associated syscall_exit trace event */ @@ -25,8 +23,6 @@ struct syscall_metadata { int nb_args; const char **types; const char **args; - int enter_id; - int exit_id; struct ftrace_event_call *enter_event; struct ftrace_event_call *exit_event; @@ -35,8 +31,6 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); extern int syscall_name_to_nr(const char *name); -void set_syscall_enter_id(int num, int id); -void set_syscall_exit_id(int num, int id); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 39649b1..27eb18d 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -67,16 +67,6 @@ int syscall_name_to_nr(const char *name) return -1; } -void set_syscall_enter_id(int num, int id) -{ - syscalls_metadata[num]->enter_id = id; -} - -void set_syscall_exit_id(int num, int id) -{ - syscalls_metadata[num]->exit_id = id; -} - enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) { @@ -93,7 +83,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags) if (!entry) goto end; - if (entry->enter_id != ent->type) { + if (entry->enter_event->id != ent->type) { WARN_ON_ONCE(1); goto end; } @@ -148,7 +138,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags) return TRACE_TYPE_HANDLED; } - if (entry->exit_id != ent->type) { + if (entry->exit_event->id != ent->type) { WARN_ON_ONCE(1); return TRACE_TYPE_UNHANDLED; } @@ -302,8 +292,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id) size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; - event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id, - size, 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, + sys_data->enter_event->id, size, 0, 0); if (!event) return; @@ -334,8 +324,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret) if (!sys_data) return; - event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id, - sizeof(*entry), 0, 0); + event = trace_current_buffer_lock_reserve(&buffer, + sys_data->exit_event->id, sizeof(*entry), 0, 0); if (!event) return; @@ -510,11 +500,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) rec = (struct syscall_trace_enter *) raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->enter_id; + rec->ent.type = sys_data->enter_event->id; rec->nr = syscall_nr; syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - perf_tp_event(sys_data->enter_id, 0, 1, rec, size); + perf_tp_event(sys_data->enter_event->id, 0, 1, rec, size); end: perf_swevent_put_recursion_context(rctx); @@ -615,11 +605,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) rec = (struct syscall_trace_exit *)raw_data; tracing_generic_entry_update(&rec->ent, 0, 0); - rec->ent.type = sys_data->exit_id; + rec->ent.type = sys_data->exit_event->id; rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - perf_tp_event(sys_data->exit_id, 0, 1, rec, size); + perf_tp_event(sys_data->exit_event->id, 0, 1, rec, size); end: perf_swevent_put_recursion_context(rctx); -- cgit v0.10.2 From c252f65793874b56d50395ab604db465ce688665 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:47 +0800 Subject: trace_syscalls: Add syscall_nr field to struct syscall_metadata Add syscall_nr field to struct syscall_metadata, it helps us to get syscall number easier. Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D293.6090800@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index b9af875..3c280d7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -161,7 +161,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ static int init_enter_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ + num = __syscall_meta_##sname.syscall_nr; \ if (num < 0) \ return -ENOSYS; \ id = register_ftrace_event(&enter_syscall_print_##sname);\ @@ -197,7 +197,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ static int init_exit_##sname(struct ftrace_event_call *call) \ { \ int num, id; \ - num = syscall_name_to_nr("sys"#sname); \ + num = __syscall_meta_##sname.syscall_nr; \ if (num < 0) \ return -ENOSYS; \ id = register_ftrace_event(&exit_syscall_print_##sname);\ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index ca09561..1531eef 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -12,6 +12,7 @@ * A syscall entry in the ftrace syscalls array. * * @name: name of the syscall + * @syscall_nr: number of the syscall * @nb_args: number of parameters it takes * @types: list of types as strings * @args: list of args as strings (args[i] matches types[i]) @@ -20,6 +21,7 @@ */ struct syscall_metadata { const char *name; + int syscall_nr; int nb_args; const char **types; const char **args; @@ -30,7 +32,6 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); -extern int syscall_name_to_nr(const char *name); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 27eb18d..144cc14 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -51,7 +51,7 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } -int syscall_name_to_nr(const char *name) +static int syscall_name_to_nr(const char *name) { int i; @@ -342,10 +342,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) { int ret = 0; int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -365,10 +363,8 @@ int reg_event_syscall_enter(struct ftrace_event_call *call) void unreg_event_syscall_enter(struct ftrace_event_call *call) { int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -383,10 +379,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) { int ret = 0; int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return -ENOSYS; mutex_lock(&syscall_trace_lock); @@ -406,10 +400,8 @@ int reg_event_syscall_exit(struct ftrace_event_call *call) void unreg_event_syscall_exit(struct ftrace_event_call *call) { int num; - const char *name; - name = ((struct syscall_metadata *)call->data)->name; - num = syscall_name_to_nr(name); + num = ((struct syscall_metadata *)call->data)->syscall_nr; if (num < 0 || num >= NR_syscalls) return; mutex_lock(&syscall_trace_lock); @@ -436,6 +428,10 @@ int __init init_ftrace_syscalls(void) for (i = 0; i < NR_syscalls; i++) { addr = arch_syscall_addr(i); meta = find_syscall_meta(addr); + if (!meta) + continue; + + meta->syscall_nr = i; syscalls_metadata[i] = meta; } -- cgit v0.10.2 From a1301da0997bf73c44dbe584e9070a13adc89672 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:23:55 +0800 Subject: trace_syscalls: Remove duplicate init_enter_##sname() use only one init_syscall_trace instead of many init_enter_##sname()/init_exit_##sname() Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D29B.6090708@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3c280d7..cf0d923 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -158,19 +158,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - static int init_enter_##sname(struct ftrace_event_call *call) \ - { \ - int num, id; \ - num = __syscall_meta_##sname.syscall_nr; \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&enter_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_enter_##sname.id = id; \ - INIT_LIST_HEAD(&event_enter_##sname.fields); \ - return 0; \ - } \ TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -179,7 +166,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .name = "sys_enter"#sname, \ .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ - .raw_init = init_enter_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ @@ -194,19 +181,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - static int init_exit_##sname(struct ftrace_event_call *call) \ - { \ - int num, id; \ - num = __syscall_meta_##sname.syscall_nr; \ - if (num < 0) \ - return -ENOSYS; \ - id = register_ftrace_event(&exit_syscall_print_##sname);\ - if (!id) \ - return -ENODEV; \ - event_exit_##sname.id = id; \ - INIT_LIST_HEAD(&event_exit_##sname.fields); \ - return 0; \ - } \ TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ @@ -215,7 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ .name = "sys_exit"#sname, \ .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ - .raw_init = init_exit_##sname, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 1531eef..dff9371 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -32,6 +32,7 @@ struct syscall_metadata { #ifdef CONFIG_FTRACE_SYSCALLS extern unsigned long arch_syscall_addr(int nr); +extern int init_syscall_trace(struct ftrace_event_call *call); extern int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 144cc14..c651409 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -412,6 +412,18 @@ void unreg_event_syscall_exit(struct ftrace_event_call *call) mutex_unlock(&syscall_trace_lock); } +int init_syscall_trace(struct ftrace_event_call *call) +{ + int id; + + id = register_ftrace_event(call->event); + if (!id) + return -ENODEV; + call->id = id; + INIT_LIST_HEAD(&call->fields); + return 0; +} + int __init init_ftrace_syscalls(void) { struct syscall_metadata *meta; -- cgit v0.10.2 From 3bbe84e9d385205d638035ee9dcc4db1b486ea08 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:24:01 +0800 Subject: trace_syscalls: Simplify syscall profile use only one prof_sysenter_enable() instead of prof_sysenter_enable_##sname() use only one prof_sysenter_disable() instead of prof_sysenter_disable_##sname() use only one prof_sysexit_enable() instead of prof_sysexit_enable_##sname() use only one prof_sysexit_disable() instead of prof_sysexit_disable_##sname() Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D2A1.8060304@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index cf0d923..c2df3a5 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -99,37 +99,16 @@ struct perf_event_attr; #define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) #ifdef CONFIG_EVENT_PROFILE -#define TRACE_SYS_ENTER_PROFILE(sname) \ -static int prof_sysenter_enable_##sname(struct ftrace_event_call *unused) \ -{ \ - return reg_prof_syscall_enter("sys"#sname); \ -} \ - \ -static void prof_sysenter_disable_##sname(struct ftrace_event_call *unused) \ -{ \ - unreg_prof_syscall_enter("sys"#sname); \ -} - -#define TRACE_SYS_EXIT_PROFILE(sname) \ -static int prof_sysexit_enable_##sname(struct ftrace_event_call *unused) \ -{ \ - return reg_prof_syscall_exit("sys"#sname); \ -} \ - \ -static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ -{ \ - unreg_prof_syscall_exit("sys"#sname); \ -} #define TRACE_SYS_ENTER_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysenter_enable_##sname, \ - .profile_disable = prof_sysenter_disable_##sname, + .profile_enable = prof_sysenter_enable, \ + .profile_disable = prof_sysenter_disable, #define TRACE_SYS_EXIT_PROFILE_INIT(sname) \ .profile_count = ATOMIC_INIT(-1), \ - .profile_enable = prof_sysexit_enable_##sname, \ - .profile_disable = prof_sysexit_disable_##sname, + .profile_enable = prof_sysexit_enable, \ + .profile_disable = prof_sysexit_disable, #else #define TRACE_SYS_ENTER_PROFILE(sname) #define TRACE_SYS_ENTER_PROFILE_INIT(sname) @@ -158,7 +137,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ - TRACE_SYS_ENTER_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ @@ -181,7 +159,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *unused) \ struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ - TRACE_SYS_EXIT_PROFILE(sname); \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) \ diff --git a/include/trace/syscall.h b/include/trace/syscall.h index dff9371..961fda3 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -50,10 +50,10 @@ enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags); enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags); #endif #ifdef CONFIG_EVENT_PROFILE -int reg_prof_syscall_enter(char *name); -void unreg_prof_syscall_enter(char *name); -int reg_prof_syscall_exit(char *name); -void unreg_prof_syscall_exit(char *name); +int prof_sysenter_enable(struct ftrace_event_call *call); +void prof_sysenter_disable(struct ftrace_event_call *call); +int prof_sysexit_enable(struct ftrace_event_call *call); +void prof_sysexit_disable(struct ftrace_event_call *call); #endif diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index c651409..1e85b6c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -520,14 +520,12 @@ end_recursion: local_irq_restore(flags); } -int reg_prof_syscall_enter(char *name) +int prof_sysenter_enable(struct ftrace_event_call *call) { int ret = 0; int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return -ENOSYS; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); if (!sys_prof_refcount_enter) @@ -543,13 +541,11 @@ int reg_prof_syscall_enter(char *name) return ret; } -void unreg_prof_syscall_enter(char *name) +void prof_sysenter_disable(struct ftrace_event_call *call) { int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); sys_prof_refcount_enter--; @@ -625,14 +621,12 @@ end_recursion: local_irq_restore(flags); } -int reg_prof_syscall_exit(char *name) +int prof_sysexit_enable(struct ftrace_event_call *call) { int ret = 0; int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return -ENOSYS; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); if (!sys_prof_refcount_exit) @@ -648,13 +642,11 @@ int reg_prof_syscall_exit(char *name) return ret; } -void unreg_prof_syscall_exit(char *name) +void prof_sysexit_disable(struct ftrace_event_call *call) { int num; - num = syscall_name_to_nr(name); - if (num < 0 || num >= NR_syscalls) - return; + num = ((struct syscall_metadata *)call->data)->syscall_nr; mutex_lock(&syscall_trace_lock); sys_prof_refcount_exit--; -- cgit v0.10.2 From 7be077f56370cd52c48c08272b0867132f87bc48 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 1 Dec 2009 16:24:06 +0800 Subject: trace_syscalls: Remove unused syscall_name_to_nr() After duplications are removed, syscall_name_to_nr() is unused. Signed-off-by: Lai Jiangshan Acked-by: Jason Baron Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <4B14D2A6.6060803@cn.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 1e85b6c..57501d9 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -51,22 +51,6 @@ static struct syscall_metadata *syscall_nr_to_meta(int nr) return syscalls_metadata[nr]; } -static int syscall_name_to_nr(const char *name) -{ - int i; - - if (!syscalls_metadata) - return -1; - - for (i = 0; i < NR_syscalls; i++) { - if (syscalls_metadata[i]) { - if (!strcmp(syscalls_metadata[i]->name, name)) - return i; - } - } - return -1; -} - enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags) { -- cgit v0.10.2 From 4c4cb1b1605e50983afd7924980c9434b214599f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 1 Dec 2009 13:17:41 -0800 Subject: drivers/media/dvb/dvb-core/dvb_frontend.c: needs semaphore.h Fixes: v4l/dvb_frontend.c: In function 'dvb_frontend_stop': v4l/dvb_frontend.c:707: error: implicit declaration of function 'init_MUTEX' Addresses http://bugzilla.kernel.org/show_bug.cgi?id=14609 Reported-by: Cc: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c index ddf639e..9808241 100644 --- a/drivers/media/dvb/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb/dvb-core/dvb_frontend.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From fa00e106eb6f082654d822a0946c0c86297ede2c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 1 Dec 2009 13:17:41 -0800 Subject: drivers/rtc/rtc-pcf50633.c: fix use after free in pcf50633_rtc_probe() "rtc" is freed and then dereferenced on the next line. This patch fixes that. Signed-off-by: Dan Carpenter Acked-by: Alessandro Zummo Cc: David Brownell Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-pcf50633.c b/drivers/rtc/rtc-pcf50633.c index 33a10c4..4c5d5d0 100644 --- a/drivers/rtc/rtc-pcf50633.c +++ b/drivers/rtc/rtc-pcf50633.c @@ -292,8 +292,9 @@ static int __devinit pcf50633_rtc_probe(struct platform_device *pdev) &pcf50633_rtc_ops, THIS_MODULE); if (IS_ERR(rtc->rtc_dev)) { + int ret = PTR_ERR(rtc->rtc_dev); kfree(rtc); - return PTR_ERR(rtc->rtc_dev); + return ret; } pcf50633_register_irq(rtc->pcf, PCF50633_IRQ_ALARM, -- cgit v0.10.2 From 3510b8f7f53bf0ded13888724734bba87d22ecc3 Mon Sep 17 00:00:00 2001 From: Sudhakar Rajashekhara Date: Tue, 1 Dec 2009 13:17:43 -0800 Subject: davinci: fb: fix frame buffer driver issues Following issues have been addressed on DA8XX/OMAP-L1XX: a. Screen misalignment during booting when frame buffer console is enabled. b. Driver was configured always in PSEUDOCOLOR mode. This patch dynamically configures the driver either in PSEUDOCOLOUR or TRUECOLOR mode depending on bpp. c. The RED and BLUE offsets were interchanged resulting in wrong bootup logo colour. This patch has been tested on DA830/OMAP-L137 and DA850/OMAP-L138 EVMs. Signed-off-by: Sudhakar Rajashekhara Cc: Steve Chen Cc: Pavel Kiryukhin Cc: Krzysztof Helt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/video/da8xx-fb.c b/drivers/video/da8xx-fb.c index 035d568..ea1fd3f 100644 --- a/drivers/video/da8xx-fb.c +++ b/drivers/video/da8xx-fb.c @@ -554,11 +554,11 @@ static int fb_check_var(struct fb_var_screeninfo *var, var->transp.length = 0; break; case 16: /* RGB 565 */ - var->red.offset = 0; + var->red.offset = 11; var->red.length = 5; var->green.offset = 5; var->green.length = 6; - var->blue.offset = 11; + var->blue.offset = 0; var->blue.length = 5; var->transp.offset = 0; var->transp.length = 0; @@ -591,7 +591,7 @@ static int __devexit fb_remove(struct platform_device *dev) unregister_framebuffer(info); fb_dealloc_cmap(&info->cmap); dma_free_coherent(NULL, par->databuf_sz + PAGE_SIZE, - info->screen_base, + info->screen_base - PAGE_SIZE, info->fix.smem_start); free_irq(par->irq, par); clk_disable(par->lcdc_clk); @@ -749,6 +749,7 @@ static int __init fb_probe(struct platform_device *device) (PAGE_SIZE - par->palette_sz); /* the rest of the frame buffer is pixel data */ + da8xx_fb_info->screen_base = par->v_palette_base + par->palette_sz; da8xx_fb_fix.smem_start = par->p_palette_base + par->palette_sz; da8xx_fb_fix.smem_len = par->databuf_sz - par->palette_sz; da8xx_fb_fix.line_length = (lcdc_info->width * lcd_cfg->bpp) / 8; @@ -787,6 +788,8 @@ static int __init fb_probe(struct platform_device *device) da8xx_fb_info->var = da8xx_fb_var; da8xx_fb_info->fbops = &da8xx_fb_ops; da8xx_fb_info->pseudo_palette = par->pseudo_palette; + da8xx_fb_info->fix.visual = (da8xx_fb_info->var.bits_per_pixel <= 8) ? + FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR; ret = fb_alloc_cmap(&da8xx_fb_info->cmap, PALETTE_SIZE, 0); if (ret) @@ -825,7 +828,7 @@ err_free_irq: err_release_fb_mem: dma_free_coherent(NULL, par->databuf_sz + PAGE_SIZE, - da8xx_fb_info->screen_base, + da8xx_fb_info->screen_base - PAGE_SIZE, da8xx_fb_info->fix.smem_start); err_release_fb: -- cgit v0.10.2 From 974348435c6d7b65fd6d8857b4809b0ff208d323 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Tue, 1 Dec 2009 13:17:44 -0800 Subject: kbuild: stepping down as maintainer It has been fun but the last year or more it has been a duty and a burden. So I leave it open for others to take over. Signed-off-by: Sam Ravnborg Cc: Anibal Monsalve Salazar Cc: Steven Rostedt Cc: Michal Marek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/MAINTAINERS b/MAINTAINERS index 16129e6..4f96ac8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3015,11 +3015,8 @@ S: Maintained F: fs/autofs4/ KERNEL BUILD -M: Sam Ravnborg -T: git git://git.kernel.org/pub/scm/linux/kernel/git/sam/kbuild-next.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/sam/kbuild-fixes.git L: linux-kbuild@vger.kernel.org -S: Maintained +S: Orphan F: Documentation/kbuild/ F: Makefile F: scripts/Makefile.* -- cgit v0.10.2 From ca0297015d4f117005718e01aa368875abcccbc5 Mon Sep 17 00:00:00 2001 From: Alek Du Date: Tue, 1 Dec 2009 13:17:45 -0800 Subject: gpio: Langwell GPIO driver bugfixes - Remove wrong and unnecessary unmask operation - Remove extra GEDR reading This fixes the loss of interrupts which occurs when two or more pins are triggered in close succession. Signed-off-by: Alek Du Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/gpio/langwell_gpio.c b/drivers/gpio/langwell_gpio.c index 5711ce5..4baf3d7 100644 --- a/drivers/gpio/langwell_gpio.c +++ b/drivers/gpio/langwell_gpio.c @@ -144,13 +144,6 @@ static int lnw_irq_type(unsigned irq, unsigned type) static void lnw_irq_unmask(unsigned irq) { - struct lnw_gpio *lnw = get_irq_chip_data(irq); - u32 gpio = irq - lnw->irq_base; - u8 reg = gpio / 32; - void __iomem *gedr; - - gedr = (void __iomem *)(&lnw->reg_base->GEDR[reg]); - writel(BIT(gpio % 32), gedr); }; static void lnw_irq_mask(unsigned irq) @@ -183,13 +176,11 @@ static void lnw_irq_handler(unsigned irq, struct irq_desc *desc) gedr_v = readl(gedr); if (!gedr_v) continue; - for (gpio = reg*32; gpio < reg*32+32; gpio++) { - gedr_v = readl(gedr); + for (gpio = reg*32; gpio < reg*32+32; gpio++) if (gedr_v & BIT(gpio % 32)) { pr_debug("pin %d triggered\n", gpio); generic_handle_irq(lnw->irq_base + gpio); } - } /* clear the edge detect status bit */ writel(gedr_v, gedr); } -- cgit v0.10.2 From 0a1f127a0594d62cb23c26732686d0e2b097b264 Mon Sep 17 00:00:00 2001 From: Peter Horton Date: Tue, 1 Dec 2009 13:17:46 -0800 Subject: aoe: prevent cache aliases Prevent the AoE block driver from creating cache aliases of page cache pages on machines with virtually indexed caches. Building kernels on an AT91SAM9G20 board without this patch fails with segmentation faults after a couple of passes. Signed-off-by: Peter Horton Cc: "Ed L. Cashin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 965ece2..13bb69d 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -735,6 +735,21 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector part_stat_unlock(); } +/* + * Ensure we don't create aliases in VI caches + */ +static inline void +killalias(struct bio *bio) +{ + struct bio_vec *bv; + int i; + + if (bio_data_dir(bio) == READ) + __bio_for_each_segment(bv, bio, i, 0) { + flush_dcache_page(bv->bv_page); + } +} + void aoecmd_ata_rsp(struct sk_buff *skb) { @@ -853,8 +868,12 @@ aoecmd_ata_rsp(struct sk_buff *skb) if (buf && --buf->nframesout == 0 && buf->resid == 0) { diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector); - n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; - bio_endio(buf->bio, n); + if (buf->flags & BUFFL_FAIL) + bio_endio(buf->bio, -EIO); + else { + killalias(buf->bio); + bio_endio(buf->bio, 0); + } mempool_free(buf, d->bufpool); } -- cgit v0.10.2 From 48a7f7746875425797aea31ed2910088635c1c7a Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 1 Dec 2009 13:17:48 -0800 Subject: rtc-x1205: fix rtc_time to y2k register value conversion The possible CCR_Y2K register values are 19 or 20 and struct rtc_time's tm_year is in years since 1900. The function translating rtc_time to register values assumes tm_year to be years since first christmas, though, and we end up storing 0 or 1 in the CCR_Y2K register, which the hardware does not refuse to do. A subsequent probing of the clock fails due to the invalid value range in the register, though. [ And if it didn't, reading the clock would yield a bogus year because the function translating registers to tm_year is assuming a register value of 19 or 20. ] This fixes the conversion from years since 1900 in tm_year to the corresponding CCR_Y2K value of 19 or 20. Signed-off-by: Johannes Weiner Cc: Alessandro Zummo Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c index 310c107..cc9ba47 100644 --- a/drivers/rtc/rtc-x1205.c +++ b/drivers/rtc/rtc-x1205.c @@ -195,7 +195,7 @@ static int x1205_set_datetime(struct i2c_client *client, struct rtc_time *tm, /* year, since the rtc epoch*/ buf[CCR_YEAR] = bin2bcd(tm->tm_year % 100); buf[CCR_WDAY] = tm->tm_wday & 0x07; - buf[CCR_Y2K] = bin2bcd(tm->tm_year / 100); + buf[CCR_Y2K] = bin2bcd((tm->tm_year + 1900) / 100); } /* If writing alarm registers, set compare bits on registers 0-4 */ -- cgit v0.10.2 From cb8799eeddd542abd504c62c11014dfbaec7f3a1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 1 Dec 2009 13:17:49 -0800 Subject: rtc-x1205: reset clock to sane state after power failure When detecting power failure, the probe function would reset the clock time to defined state. However, the clock's _date_ might still be bogus and a subsequent probe fails when sanity-checking these values. Change the power-failure fixup code to do a full setting of rtc_time, including a valid date. Signed-off-by: Johannes Weiner Cc: Alessandro Zummo Cc: Paul Gortmaker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c index cc9ba47..6583c1a 100644 --- a/drivers/rtc/rtc-x1205.c +++ b/drivers/rtc/rtc-x1205.c @@ -280,9 +280,9 @@ static int x1205_fix_osc(struct i2c_client *client) int err; struct rtc_time tm; - tm.tm_hour = tm.tm_min = tm.tm_sec = 0; + memset(&tm, 0, sizeof(tm)); - err = x1205_set_datetime(client, &tm, 0, X1205_CCR_BASE, 0); + err = x1205_set_datetime(client, &tm, 1, X1205_CCR_BASE, 0); if (err < 0) dev_err(&client->dev, "unable to restart the oscillator\n"); -- cgit v0.10.2 From e9438e3193a0d1430b70fafe46855c1a56202f7c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 1 Dec 2009 13:17:50 -0800 Subject: sysfs: fix SYSFS_DEPRECATED_V2 prompt The SYSFS_DEPRECATED_V2 says "remove" older, deprecated features, but it actually enables them, so correct this confusing, backwards text. Signed-off-by: Randy Dunlap Cc: Kay Sievers Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds diff --git a/init/Kconfig b/init/Kconfig index 39923cc..eb4b337 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -606,7 +606,7 @@ config SYSFS_DEPRECATED bool config SYSFS_DEPRECATED_V2 - bool "remove sysfs features which may confuse old userspace tools" + bool "enable deprecated sysfs features which may confuse old userspace tools" depends on SYSFS default n select SYSFS_DEPRECATED -- cgit v0.10.2 From c19e33aa840e9202ef8d4c93056b59f3edc2208d Mon Sep 17 00:00:00 2001 From: Liming Wang Date: Wed, 2 Dec 2009 14:11:46 +0800 Subject: perf tools: Fix _GNU_SOURCE macro related strndup() build error strndup is a GNU extension. So dont include string.h without defining _GNU_SOURCE (it results in a compile error otherwise). Remove these includes as util.h does it already. Signed-off-by: Liming Wang Acked-by: Frederic Weisbecker Acked-by: Xiao Guangrong Cc: peterz@infradead.org Cc: mhiramat@redhat.com LKML-Reference: <1259734306-26323-1-git-send-email-liming.wang@windriver.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 0977cf4..f24a8cc 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -1,5 +1,3 @@ -#include -#include #include "string.h" #include "util.h" -- cgit v0.10.2 From bdad0db7dbdb37d0bb3c7d0f65cd3ff599ea6ecb Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 2 Dec 2009 16:08:41 +0800 Subject: perf_event: Fix compile error Fix: cc1: warnings being treated as errors builtin-probe.c: In function 'cmd_probe': builtin-probe.c:163: error: unused variable 'fd' Signed-off-by: Xiao Guangrong Cc: Masami Hiramatsu Cc: Peter Zijlstra LKML-Reference: <4B162089.8000907@cn.fujitsu.com> [ v2: use NO_LIBDWARF instead of __used ] Signed-off-by: Ingo Molnar diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index b5d15cf..a58e11b 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -160,7 +160,10 @@ static const struct option options[] = { int cmd_probe(int argc, const char **argv, const char *prefix __used) { - int i, j, fd, ret; + int i, j, ret; +#ifndef NO_LIBDWARF + int fd; +#endif struct probe_point *pp; argc = parse_options(argc, argv, options, probe_usage, -- cgit v0.10.2 From ec70ccd806111ba3caf596def91a8580138b12db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= Date: Tue, 1 Dec 2009 15:05:01 -0500 Subject: perf: Don't free perf_mmap_data until work has been done MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the CONFIG_PERF_USE_VMALLOC case, perf_mmap_data_free() only schedules the cleanup of the perf_mmap_data struct. In that case we have to wait until the work has been done before we free data. Signed-off-by: Kristian Høgsberg Cc: David S. Miller Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker Cc: LKML-Reference: <1259697901-1747-1-git-send-email-krh@bitplanet.net> Signed-off-by: Ingo Molnar diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 040ee51..6b7ddba 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2210,6 +2210,7 @@ static void perf_mmap_data_free(struct perf_mmap_data *data) perf_mmap_free_page((unsigned long)data->user_page); for (i = 0; i < data->nr_pages; i++) perf_mmap_free_page((unsigned long)data->data_pages[i]); + kfree(data); } #else @@ -2250,6 +2251,7 @@ static void perf_mmap_data_free_work(struct work_struct *work) perf_mmap_unmark_page(base + (i * PAGE_SIZE)); vfree(base); + kfree(data); } static void perf_mmap_data_free(struct perf_mmap_data *data) @@ -2355,7 +2357,6 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head) data = container_of(rcu_head, struct perf_mmap_data, rcu_head); perf_mmap_data_free(data); - kfree(data); } static void perf_mmap_data_release(struct perf_event *event) -- cgit v0.10.2 From 8592e6486a177a02f048567cb928bc3a1f9a86c3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 2 Dec 2009 12:56:46 +0900 Subject: sched: Revert 498657a478c60be092208422fefa9c7b248729c2 498657a478c60be092208422fefa9c7b248729c2 incorrectly assumed that preempt wasn't disabled around context_switch() and thus was fixing imaginary problem. It also broke KVM because it depended on ->sched_in() to be called with irq enabled so that it can do smp calls from there. Revert the incorrect commit and add comment describing different contexts under with the two callbacks are invoked. Avi: spotted transposed in/out in the added comment. Signed-off-by: Tejun Heo Acked-by: Avi Kivity Cc: peterz@infradead.org Cc: efault@gmx.de Cc: rusty@rustcorp.com.au LKML-Reference: <1259726212-30259-2-git-send-email-tj@kernel.org> Signed-off-by: Ingo Molnar diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 72b1a10..2e681d9 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -105,6 +105,11 @@ struct preempt_notifier; * @sched_out: we've just been preempted * notifier: struct preempt_notifier for the task being preempted * next: the task that's kicking us out + * + * Please note that sched_in and out are called under different + * contexts. sched_out is called with rq lock held and irq disabled + * while sched_in is called without rq lock and irq enabled. This + * difference is intentional and depended upon by its users. */ struct preempt_ops { void (*sched_in)(struct preempt_notifier *notifier, int cpu); diff --git a/kernel/sched.c b/kernel/sched.c index b3d4e2b..1031cae 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2768,9 +2768,9 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) prev_state = prev->state; finish_arch_switch(prev); perf_event_task_sched_in(current, cpu_of(rq)); - fire_sched_in_preempt_notifiers(current); finish_lock_switch(rq, prev); + fire_sched_in_preempt_notifiers(current); if (mm) mmdrop(mm); if (unlikely(prev_state == TASK_DEAD)) { -- cgit v0.10.2 From 3a9089fd78367e2c6c815129030b790a0f5c2715 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Tue, 1 Dec 2009 12:18:49 -0500 Subject: tracing: Add DEFINE_EVENT(), DEFINE_SINGLE_EVENT() support to docbook The introduction of the new 'DECLARE_EVENT_CLASS()' obviates the need for the 'TRACE_EVENT()' macro in some cases. Thus, docbook style comments that used to live with 'TRACE_EVENT()' are now moved to 'DEFINE_EVENT()'. Thus, we need to make the docbook system understand the new 'DEFINE_EVENT()' macro. In addition I've tried to futureproof the patch, by also adding support for 'DEFINE_SINGLE_EVENT()', since there has been discussion about renaming: TRACE_EVENT() -> DEFINE_SINGLE_EVENT(). Without this patch the tracepoint docbook fails to build. I've verified that this patch correctly builds the tracepoint docbook which currently covers signals, and irqs. Changes in v2: - properly indent perl 'if' statements Signed-off-by: Jason Baron Acked-by: Steven Rostedt Acked-by: Randy Dunlap Cc: William Cohen Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Masami Hiramatsu LKML-Reference: <200912011718.nB1HIn7t011371@int-mx04.intmail.prod.int.phx2.redhat.com> Signed-off-by: Ingo Molnar diff --git a/scripts/kernel-doc b/scripts/kernel-doc index ea9f8a5..241310e 100755 --- a/scripts/kernel-doc +++ b/scripts/kernel-doc @@ -1852,10 +1852,17 @@ sub tracepoint_munge($) { my $tracepointname = 0; my $tracepointargs = 0; - if($prototype =~ m/TRACE_EVENT\((.*?),/) { + if ($prototype =~ m/TRACE_EVENT\((.*?),/) { $tracepointname = $1; } - if($prototype =~ m/TP_PROTO\((.*?)\)/) { + if ($prototype =~ m/DEFINE_SINGLE_EVENT\((.*?),/) { + $tracepointname = $1; + } + if ($prototype =~ m/DEFINE_EVENT\((.*?),(.*?),/) { + $tracepointname = $2; + } + $tracepointname =~ s/^\s+//; #strip leading whitespace + if ($prototype =~ m/TP_PROTO\((.*?)\)/) { $tracepointargs = $1; } if (($tracepointname eq 0) || ($tracepointargs eq 0)) { @@ -1920,7 +1927,9 @@ sub process_state3_function($$) { if ($prototype =~ /SYSCALL_DEFINE/) { syscall_munge(); } - if ($prototype =~ /TRACE_EVENT/) { + if ($prototype =~ /TRACE_EVENT/ || $prototype =~ /DEFINE_EVENT/ || + $prototype =~ /DEFINE_SINGLE_EVENT/) + { tracepoint_munge($file); } dump_function($prototype, $file); -- cgit v0.10.2 From 6b62fe019e39edfd1dbe3f224ecd0a87d9365223 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2009 07:23:10 +0100 Subject: tracing/syscalls: Make syscall events print callbacks static enter_syscall_print_##sname and exit_syscall_print_##sname don't need to have a global scope. Make them static. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Jason Baron Cc: Lai Jiangshan LKML-Reference: <1259734990-9034-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index c2df3a5..e79e2f3 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -134,7 +134,7 @@ struct perf_event_attr; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_enter_##sname; \ - struct trace_event enter_syscall_print_##sname = { \ + static struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ static struct ftrace_event_call __used \ @@ -156,7 +156,7 @@ struct perf_event_attr; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call event_exit_##sname; \ - struct trace_event exit_syscall_print_##sname = { \ + static struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ static struct ftrace_event_call __used \ -- cgit v0.10.2 From 1cedae72904b85462082dbcfd5190309ba37f8bd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2009 07:32:16 +0100 Subject: hw-breakpoints: Keep track of user disabled breakpoints When we disable a breakpoint through dr7, we unregister it right away, making us lose track of its corresponding address register value. It means that the following sequence would be unsupported: - set address in dr0 - enable it through dr7 - disable it through dr7 - enable it through dr7 because we lost the address register value when we disabled the breakpoint. Don't unregister the disabled breakpoints but rather disable them. Reported-by: "K.Prasad" Signed-off-by: Frederic Weisbecker LKML-Reference: <1259735536-9236-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 2941b32..04d182a 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -595,7 +595,7 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) static struct perf_event * ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, - struct task_struct *tsk) + struct task_struct *tsk, int disabled) { int err; int gen_len, gen_type; @@ -616,7 +616,7 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, attr = bp->attr; attr.bp_len = gen_len; attr.bp_type = gen_type; - attr.disabled = 0; + attr.disabled = disabled; return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); } @@ -655,13 +655,21 @@ restore: */ if (!second_pass) continue; + thread->ptrace_bps[i] = NULL; - unregister_hw_breakpoint(bp); + bp = ptrace_modify_breakpoint(bp, len, type, + tsk, 1); + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + thread->ptrace_bps[i] = NULL; + break; + } + thread->ptrace_bps[i] = bp; } continue; } - bp = ptrace_modify_breakpoint(bp, len, type, tsk); + bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); /* Incorrect bp, or we have a bug in bp API */ if (IS_ERR(bp)) { -- cgit v0.10.2 From ca64c47cecd0321b2e0dcbd7aaff44b68ce20654 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 1 Dec 2009 15:31:15 -0800 Subject: x86, io-apic: Move the effort of clearing remoteIRR explicitly before migrating the irq When the level-triggered interrupt is seen as an edge interrupt, we try to clear the remoteIRR explicitly (using either an io-apic eoi register when present or through the idea of changing trigger mode of the io-apic RTE to edge and then back to level). But this explicit try also needs to happen before we try to migrate the irq. Otherwise irq migration attempt will fail anyhow, as it postpones the irq migration to a later attempt when it sees the remoteIRR in the io-apic RTE still set. Signed-off-by: "Maciej W. Rozycki" Reviewed-by: Suresh Siddha Cc: ebiederm@xmission.com Cc: garyhade@us.ibm.com LKML-Reference: <20091201233334.975416130@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 085e60e..b377b97 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2583,6 +2583,20 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); + /* Tail end of version 0x11 I/O APIC bug workaround */ + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + + if (use_eoi_reg) + eoi_ioapic_irq(desc); + else { + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(cfg); + __unmask_and_level_IO_APIC_irq(cfg); + spin_unlock(&ioapic_lock); + } + } + /* Now we can move and renable the irq */ if (unlikely(do_unmask_irq)) { /* Only migrate the irq if the ack has been received. @@ -2616,20 +2630,6 @@ static void ack_apic_level(unsigned int irq) move_masked_irq(irq); unmask_IO_APIC_irq_desc(desc); } - - /* Tail end of version 0x11 I/O APIC bug workaround */ - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - - if (use_eoi_reg) - eoi_ioapic_irq(desc); - else { - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); - } - } } #ifdef CONFIG_INTR_REMAP -- cgit v0.10.2 From c29d9db338db606c3335a03f337e1d4b7f6bb727 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 1 Dec 2009 15:31:16 -0800 Subject: x86, ioapic: Fix the EOI register detection mechanism Maciej W. Rozycki reported: > 82093AA I/O APIC has its version set to 0x11 and it > does not support the EOI register. Similarly I/O APICs > integrated into the 82379AB south bridge and the 82374EB/SB > EISA component. IO-APIC versions below 0x20 don't support EOI register. Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic version as 0x2. This is an error with documentation and these ICH chips use io-apic's of version 0x20 and indeed has a working EOI register for the io-apic. Fix the EOI register detection mechanism to check for version 0x20 and beyond. And also, a platform can potentially have io-apic's with different versions. Make the EOI register check per io-apic. Reported-by: Maciej W. Rozycki Signed-off-by: Suresh Siddha Cc: ebiederm@xmission.com Cc: garyhade@us.ibm.com LKML-Reference: <20091201233335.065361533@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b377b97..78960a3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -539,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, add_pin_to_irq_node(cfg, node, newapic, newpin); } +static void __io_apic_modify_irq(struct irq_pin_list *entry, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) +{ + unsigned int reg, pin; + + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin * 2); + reg &= mask_and; + reg |= mask_or; + io_apic_modify(entry->apic, 0x10 + pin * 2, reg); + if (final) + final(entry); +} + static void io_apic_modify_irq(struct irq_cfg *cfg, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { - int pin; struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin * 2); - reg &= mask_and; - reg |= mask_or; - io_apic_modify(entry->apic, 0x10 + pin * 2, reg); - if (final) - final(entry); - } + for_each_irq_pin(entry, cfg->irq_2_pin) + __io_apic_modify_irq(entry, mask_and, mask_or, final); +} + +static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED, NULL); +} + +static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) @@ -579,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } -static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED, NULL); -} - -static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER, NULL); -} - static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { struct irq_cfg *cfg = desc->chip_data; @@ -2492,17 +2498,42 @@ static void ack_apic_edge(unsigned int irq) atomic_t irq_mis_count; -static int use_eoi_reg __read_mostly; - +/* + * IO-APIC versions below 0x20 don't support EOI register. + * For the record, here is the information about various versions: + * 0Xh 82489DX + * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant + * 2Xh I/O(x)APIC which is PCI 2.2 Compliant + * 30h-FFh Reserved + * + * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic + * version as 0x2. This is an error with documentation and these ICH chips + * use io-apic's of version 0x20. + * + * For IO-APIC's with EOI register, we use that to do an explicit EOI. + * Otherwise, we simulate the EOI message manually by changing the trigger + * mode to edge and then back to level, with RTE being masked during this. +*/ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) { struct irq_pin_list *entry; for_each_irq_pin(entry, cfg->irq_2_pin) { - if (irq_remapped(irq)) - io_apic_eoi(entry->apic, entry->pin); - else - io_apic_eoi(entry->apic, cfg->vector); + if (mp_ioapics[entry->apic].apicver >= 0x20) { + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + if (irq_remapped(irq)) + io_apic_eoi(entry->apic, entry->pin); + else + io_apic_eoi(entry->apic, cfg->vector); + } else { + __mask_and_edge_IO_APIC_irq(entry); + __unmask_and_level_IO_APIC_irq(entry); + } } } @@ -2520,23 +2551,6 @@ static void eoi_ioapic_irq(struct irq_desc *desc) spin_unlock_irqrestore(&ioapic_lock, flags); } -static int ioapic_supports_eoi(void) -{ - struct pci_dev *root; - - root = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); - if (root && root->vendor == PCI_VENDOR_ID_INTEL && - mp_ioapics[0].apicver >= 0x2) { - use_eoi_reg = 1; - printk(KERN_INFO "IO-APIC supports EOI register\n"); - } else - printk(KERN_INFO "IO-APIC doesn't support EOI\n"); - - return 0; -} - -fs_initcall(ioapic_supports_eoi); - static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2587,14 +2601,7 @@ static void ack_apic_level(unsigned int irq) if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); - if (use_eoi_reg) - eoi_ioapic_irq(desc); - else { - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); - } + eoi_ioapic_irq(desc); } /* Now we can move and renable the irq */ -- cgit v0.10.2 From 1c83995b6c7c6bb795bce80f75fbffb15f78db2d Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 1 Dec 2009 15:31:17 -0800 Subject: x86, ioapic: Document another case when level irq is seen as an edge In the case when cpu goes offline, fixup_irqs() will forward any unhandled interrupt on the offlined cpu to the new cpu destination that is handling the corresponding interrupt. This interrupt forwarding is done via IPI's. Hence, in this case also level-triggered io-apic interrupt will be seen as an edge interrupt in the cpu's APIC IRR. Document this scenario in the code which handles this case by doing an explicit EOI to the io-apic to clear remote IRR of the io-apic RTE. Requested-by: Maciej W. Rozycki Signed-off-by: Suresh Siddha Cc: Maciej W. Rozycki Cc: ebiederm@xmission.com Cc: garyhade@us.ibm.com LKML-Reference: <20091201233335.143970505@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 78960a3..c0b4468 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2586,6 +2586,19 @@ static void ack_apic_level(unsigned int irq) * level-triggered interrupt. We mask the source for the time of the * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro + * + * Also in the case when cpu goes offline, fixup_irqs() will forward + * any unhandled interrupt on the offlined cpu to the new cpu + * destination that is handling the corresponding interrupt. This + * interrupt forwarding is done via IPI's. Hence, in this case also + * level-triggered io-apic interrupt will be seen as an edge + * interrupt in the IRR. And we can't rely on the cpu's EOI + * to be broadcasted to the IO-APIC's which will clear the remoteIRR + * corresponding to the level-triggered interrupt. Hence on IO-APIC's + * supporting EOI register, we do an explicit EOI to clear the + * remote IRR and on IO-APIC's which don't have an EOI register, + * we use the above logic (mask+edge followed by unmask+level) from + * Manfred Spraul to clear the remote IRR. */ cfg = desc->chip_data; i = cfg->vector; @@ -2597,7 +2610,13 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); - /* Tail end of version 0x11 I/O APIC bug workaround */ + /* + * Tail end of clearing remote IRR bit (either by delivering the EOI + * message via io-apic EOI register write or simulating it using + * mask+edge followed by unnask+level logic) manually when the + * level triggered interrupt is seen as the edge triggered interrupt + * at the cpu. + */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); -- cgit v0.10.2 From 6d20792e85187b27ae3d1b76678a2dd7025e8bc2 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 1 Dec 2009 15:31:18 -0800 Subject: x86: Remove unnecessary mdelay() from cpu_disable_common() fixup_irqs() already has a mdelay(). Remove the extra and unnecessary mdelay() from cpu_disable_common(). Signed-off-by: Suresh Siddha Cc: Maciej W. Rozycki Cc: ebiederm@xmission.com Cc: garyhade@us.ibm.com LKML-Reference: <20091201233335.232177348@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 565ebc6..324f2a4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1250,16 +1250,7 @@ static void __ref remove_cpu_from_maps(int cpu) void cpu_disable_common(void) { int cpu = smp_processor_id(); - /* - * HACK: - * Allow any queued timer interrupts to get serviced - * This is only a temporary solution until we cleanup - * fixup_irqs as we do for IA64. - */ - local_irq_enable(); - mdelay(1); - local_irq_disable(); remove_siblinginfo(cpu); /* It's now safe to remove this processor from the online map */ -- cgit v0.10.2 From 93aaa45a6ad3f983180223601fc663cc551ad499 Mon Sep 17 00:00:00 2001 From: Liming Wang Date: Wed, 2 Dec 2009 16:42:54 +0800 Subject: perf tools: Replace %m with %a in sscanf Not all glibc support %m and it results in a compile error if %m not supported. Replace it with %a and (float *) casts. Signed-off-by: Liming Wang Acked-by: Frederic Weisbecker Cc: peterz@infradead.org Cc: mhiramat@redhat.com LKML-Reference: <1259743374-9950-1-git-send-email-liming.wang@windriver.com> Signed-off-by: Ingo Molnar diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e42f3ac..cd7fbda 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -199,8 +199,8 @@ void parse_trace_kprobe_event(const char *str, char **group, char **event, semantic_error("Too less arguments."); /* Scan event and group name. */ - ret = sscanf(argv[0], "%c:%m[^/ \t]/%m[^ \t]", - &pr, group, event); + ret = sscanf(argv[0], "%c:%a[^/ \t]/%a[^ \t]", + &pr, (float *)(void *)group, (float *)(void *)event); if (ret != 3) semantic_error("Failed to parse event name: %s", argv[0]); pr_debug("Group:%s Event:%s probe:%c\n", *group, *event, pr); @@ -211,7 +211,7 @@ void parse_trace_kprobe_event(const char *str, char **group, char **event, pp->retprobe = (pr == 'r'); /* Scan function name and offset */ - ret = sscanf(argv[1], "%m[^+]+%d", &pp->function, &pp->offset); + ret = sscanf(argv[1], "%a[^+]+%d", (float *)(void *)&pp->function, &pp->offset); if (ret == 1) pp->offset = 0; -- cgit v0.10.2 From e859cf8656043f158b4004ccc8cbbf1ba4f97177 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 30 Nov 2009 19:02:22 -0500 Subject: x86: Fix comments of register/stack access functions Fix typos and some redundant comments of register/stack access functions in asm/ptrace.h. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Frederic Weisbecker Cc: Roland McGrath Cc: Oleg Nesterov Cc: Wenji Huang Cc: Mahesh J Salgaonkar LKML-Reference: <20091201000222.7669.7477.stgit@harusame> Signed-off-by: Ingo Molnar Suggested-by: Wenji Huang diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index a3d49dd..3d11fd0 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -227,8 +227,8 @@ extern const char *regs_query_register_name(unsigned int offset); * @regs: pt_regs from which register value is gotten. * @offset: offset number of the register. * - * regs_get_register returns the value of a register whose offset from @regs - * is @offset. The @offset is the offset of the register in struct pt_regs. + * regs_get_register returns the value of a register. The @offset is the + * offset of the register in struct pt_regs address which specified by @regs. * If @offset is bigger than MAX_REG_OFFSET, this returns 0. */ static inline unsigned long regs_get_register(struct pt_regs *regs, @@ -244,7 +244,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, * @regs: pt_regs which contains kernel stack pointer. * @addr: address which is checked. * - * regs_within_kenel_stack() checks @addr is within the kernel stack page(s). + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). * If @addr is within the kernel stack, it returns true. If not, returns false. */ static inline int regs_within_kernel_stack(struct pt_regs *regs, @@ -260,7 +260,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs, * @n: stack entry number. * * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * is specified by @regs. If the @n th entry is NOT in the kernel stack, * this returns 0. */ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, -- cgit v0.10.2 From fa1452e808732ae10e8b1267fd75fc2d028d634b Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 30 Nov 2009 14:59:44 +0900 Subject: locking, task_struct: Reduce size on TRACE_IRQFLAGS and 64bit Reorder task_struct field for TRACE_IRQFLAGS to remove padding on 64-bit. Signed-off-by: Hiroshi Shimamoto Cc: Peter Zijlstra LKML-Reference: <4B135F50.8070302@ct.jp.nec.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60..49be8f7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1421,17 +1421,17 @@ struct task_struct { #endif #ifdef CONFIG_TRACE_IRQFLAGS unsigned int irq_events; - int hardirqs_enabled; unsigned long hardirq_enable_ip; - unsigned int hardirq_enable_event; unsigned long hardirq_disable_ip; + unsigned int hardirq_enable_event; unsigned int hardirq_disable_event; - int softirqs_enabled; + int hardirqs_enabled; + int hardirq_context; unsigned long softirq_disable_ip; - unsigned int softirq_disable_event; unsigned long softirq_enable_ip; + unsigned int softirq_disable_event; unsigned int softirq_enable_event; - int hardirq_context; + int softirqs_enabled; int softirq_context; #endif #ifdef CONFIG_LOCKDEP -- cgit v0.10.2 From bdddd2963c0264c56f18043f6fa829d3c1d3d1c0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 2 Dec 2009 14:09:16 +1030 Subject: sched: Fix isolcpus boot option Anton Blanchard wrote: > We allocate and zero cpu_isolated_map after the isolcpus > __setup option has run. This means cpu_isolated_map always > ends up empty and if CPUMASK_OFFSTACK is enabled we write to a > cpumask that hasn't been allocated. I introduced this regression in 49557e620339cb13 (sched: Fix boot crash by zalloc()ing most of the cpu masks). Use the bootmem allocator if they set isolcpus=, otherwise allocate and zero like normal. Reported-by: Anton Blanchard Signed-off-by: Rusty Russell Cc: peterz@infradead.org Cc: Linus Torvalds Cc: LKML-Reference: <200912021409.17013.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar Tested-by: Anton Blanchard diff --git a/kernel/sched.c b/kernel/sched.c index 1031cae..4883fee 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8061,6 +8061,7 @@ static cpumask_var_t cpu_isolated_map; /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) { + alloc_bootmem_cpumask_var(&cpu_isolated_map); cpulist_parse(str, cpu_isolated_map); return 1; } @@ -9609,7 +9610,9 @@ void __init sched_init(void) zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); #endif - zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); + /* May be allocated at isolcpus cmdline parse time */ + if (cpu_isolated_map == NULL) + zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); #endif /* SMP */ perf_event_init(); -- cgit v0.10.2 From be088b139f5244b23ee931afb195eee236b7ff33 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Tue, 24 Nov 2009 21:06:26 -0500 Subject: [WATCHDOG] rc32434_wdt.c: use resource_size() The size value passed to ioremap_nocache() is not correct. Use resource_size() to get the correct value. Signed-off-by: H Hartley Sweeten Cc: Phil Sutter Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/rc32434_wdt.c b/drivers/watchdog/rc32434_wdt.c index f6cccc9..3764af1 100644 --- a/drivers/watchdog/rc32434_wdt.c +++ b/drivers/watchdog/rc32434_wdt.c @@ -276,7 +276,7 @@ static int __devinit rc32434_wdt_probe(struct platform_device *pdev) return -ENODEV; } - wdt_reg = ioremap_nocache(r->start, r->end - r->start); + wdt_reg = ioremap_nocache(r->start, resource_size(r)); if (!wdt_reg) { printk(KERN_ERR PFX "failed to remap I/O resources\n"); return -ENXIO; -- cgit v0.10.2 From 01be50a308be466e122c3a8b3d535f1b673ecbd2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 27 Nov 2009 15:04:58 +0000 Subject: x86/alternatives: Check replacementlen <= instrlen at build time Having run into the run-(boot-)time check a couple of times lately, I finally took time to find a build-time check so that one doesn't need to analyze the register/stack dump and resolve this (through manual lookup in vmlinux) to the offending construct. The assembler will emit a message like "Error: value of too large for field of 1 bytes at ", which while not pointing out the source location still makes analysis quite a bit easier. Signed-off-by: Jan Beulich LKML-Reference: <4B0FF8AA0200007800022703@vpn.id2.novell.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index c240efc..69b74a7 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -84,6 +84,7 @@ static inline void alternatives_smp_switch(int smp) {} " .byte " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* sourcelen */ \ " .byte 664f-663f\n" /* replacementlen */ \ + " .byte 0xff + (664f-663f) - (662b-661b)\n" /* rlen <= slen */ \ ".previous\n" \ ".section .altinstr_replacement, \"ax\"\n" \ "663:\n\t" newinstr "\n664:\n" /* replacement */ \ -- cgit v0.10.2 From 99063c0bcebcc913165a5d168050326eba3e0996 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 27 Nov 2009 15:06:16 +0000 Subject: x86/alternatives: No need for alternatives-asm.h to re-invent stuff already in asm.h This at once also gets the alignment specification right for x86-64. Signed-off-by: Jan Beulich LKML-Reference: <4B0FF8F80200007800022708@vpn.id2.novell.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index e2077d3..b97f786 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h @@ -1,17 +1,13 @@ #ifdef __ASSEMBLY__ -#ifdef CONFIG_X86_32 -# define X86_ALIGN .long -#else -# define X86_ALIGN .quad -#endif +#include #ifdef CONFIG_SMP .macro LOCK_PREFIX 1: lock .section .smp_locks,"a" - .align 4 - X86_ALIGN 1b + _ASM_ALIGN + _ASM_PTR 1b .previous .endm #else -- cgit v0.10.2 From be8147e68625a1adb111acfd6b98a492be4b74d0 Mon Sep 17 00:00:00 2001 From: Tim Blechmann Date: Wed, 2 Dec 2009 12:32:10 +0100 Subject: Revert "sched, x86: Optimize branch hint in __switch_to()" This reverts commit a3a1de0c34de6f5f8332cd6151c46af7813c0fcb. Commit 8ec6993d9f7d961014af970ded57542961fe9ad9 cleared the es and ds selectors, so the original branch hints are correct now. Therefore the branch hint doesn't need to be removed. Signed-off-by: Tim Blechmann LKML-Reference: <4B16503A.8030508@klingt.org> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 93c501d..eb62cbc 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -406,10 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * This won't pick up thread selector changes, but I guess that is ok. */ savesegment(es, prev->es); - if (next->es | prev->es) + if (unlikely(next->es | prev->es)) loadsegment(es, next->es); + savesegment(ds, prev->ds); - if (next->ds | prev->ds) + if (unlikely(next->ds | prev->ds)) loadsegment(ds, next->ds); -- cgit v0.10.2 From 810a90ae4d2a9f6d9ff80424accbd6f91947e981 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 2 Dec 2009 13:21:23 +0100 Subject: [PATCH] rc32434_wdt: fix compilation failure This patch fixes the compilation failure of rc32434 due to a bad module parameter description. Signed-off-by: Florian Fainelli Signed-off-by: Wim Van Sebroeck diff --git a/drivers/watchdog/rc32434_wdt.c b/drivers/watchdog/rc32434_wdt.c index 3764af1..bf12d06 100644 --- a/drivers/watchdog/rc32434_wdt.c +++ b/drivers/watchdog/rc32434_wdt.c @@ -62,7 +62,7 @@ extern unsigned int idt_cpu_freq; static int timeout = WATCHDOG_TIMEOUT; module_param(timeout, int, 0); MODULE_PARM_DESC(timeout, "Watchdog timeout value, in seconds (default=" - WATCHDOG_TIMEOUT ")"); + __MODULE_STRING(WATCHDOG_TIMEOUT) ")"); static int nowayout = WATCHDOG_NOWAYOUT; module_param(nowayout, int, 0); -- cgit v0.10.2 From 5d6b1edf8ccc4b7e4e77dff3fc80882833d6186e Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Tue, 1 Dec 2009 18:17:18 +0100 Subject: [ARM] pxamci: call mmc_remove_host() before freeing resources mmc_remove_host() will cause the mmc core to switch off the bus power by eventually calling pxamci_set_ios(). This function uses the regulator or the GPIO which have been freed already. This causes the following Oops on module unload. [ 49.519649] Unable to handle kernel paging request at virtual address 30303a70 [ 49.526878] pgd = c7084000 [ 49.529563] [30303a70] *pgd=00000000 [ 49.533136] Internal error: Oops: 5 [#1] [ 49.537025] last sysfs file: /sys/devices/platform/pxa27x-ohci/usb1/1-1/1-1:1.0/host0/target0:0:0/0:0:0:0/scsi_level [ 49.547471] Modules linked in: pxamci(-) eeti_ts [ 49.552061] CPU: 0 Not tainted (2.6.32-rc8 #322) [ 49.557001] PC is at regulator_is_enabled+0x3c/0xbc [ 49.561846] LR is at regulator_is_enabled+0x30/0xbc [ 49.566691] pc : [] lr : [] psr: 60000013 [ 49.566702] sp : c7083e70 ip : 30303a30 fp : 00000000 [ 49.578093] r10: c705e200 r9 : c7082000 r8 : c705e2e0 [ 49.583280] r7 : c7061340 r6 : c7061340 r5 : c7083e70 r4 : 00000000 [ 49.589759] r3 : c04dc434 r2 : c04dc434 r1 : c03eecea r0 : 00000047 [ 49.596241] Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user [ 49.603329] Control: 0000397f Table: a7084018 DAC: 00000015 [ 49.609031] Process rmmod (pid: 1101, stack limit = 0xc7082278) [ 49.614908] Stack: (0xc7083e70 to 0xc7084000) [ 49.619238] 3e60: c7082000 c703c4f8 c705ea00 c04f4074 [ 49.627366] 3e80: 00000000 c705e3a0 ffffffff c0247ddc c70361a0 00000000 c705e3a0 ffffffff [ 49.635499] 3ea0: c705e200 bf006400 c78c4f00 c705e200 c705e3a0 ffffffff c705e200 ffffffff [ 49.643633] 3ec0: c04d8ac8 c02476d0 ffffffff c0247c60 c705e200 c0248678 c705e200 c0249064 [ 49.651765] 3ee0: ffffffff bf006204 c04d8ad0 c04d8ad0 c04d8ac8 bf007490 00000880 c00440c4 [ 49.659898] 3f00: 0000b748 c01c5708 bf007490 c01c44c8 c04d8ac8 c04d8afc bf007490 c01c4570 [ 49.668031] 3f20: bf007490 bf00750c c04f4258 c01c37a4 00000000 bf00750c c7083f44 c007b014 [ 49.676162] 3f40: 4000d000 6d617870 08006963 00000001 00000000 c7085000 00000001 00000000 [ 49.684287] 3f60: 4000d000 c7083f8c 00000001 bea01a54 00005401 c7ab1400 c00440c4 00082000 [ 49.692420] 3f80: bf00750c 00000880 c7083f8c 00000000 4000cfa8 00000000 00000880 bea01cc8 [ 49.700552] 3fa0: 00000081 c0043f40 00000000 00000880 bea01cc8 00000880 00000006 00000000 [ 49.708677] 3fc0: 00000000 00000880 bea01cc8 00000081 00000097 0000cca4 0000b748 00000000 [ 49.716802] 3fe0: 4001a4f0 bea01cc0 00018bf4 4001a4fc 20000010 bea01cc8 a063e021 a063e421 [ 49.724958] [] (regulator_is_enabled+0x3c/0xbc) from [] (mmc_regulator_set_ocr+0x14/0xd8) [ 49.734836] [] (mmc_regulator_set_ocr+0x14/0xd8) from [] (pxamci_set_ios+0xd8/0x17c [pxamci]) [ 49.745044] [] (pxamci_set_ios+0xd8/0x17c [pxamci]) from [] (mmc_power_off+0x50/0x58) [ 49.754555] [] (mmc_power_off+0x50/0x58) from [] (mmc_detach_bus+0x68/0xc4) [ 49.763207] [] (mmc_detach_bus+0x68/0xc4) from [] (mmc_stop_host+0xd4/0x1bc) [ 49.771944] [] (mmc_stop_host+0xd4/0x1bc) from [] (mmc_remove_host+0xc/0x20) [ 49.780681] [] (mmc_remove_host+0xc/0x20) from [] (pxamci_remove+0xc8/0x174 [pxamci]) [ 49.790211] [] (pxamci_remove+0xc8/0x174 [pxamci]) from [] (platform_drv_remove+0x1c/0x24) [ 49.800164] [] (platform_drv_remove+0x1c/0x24) from [] (__device_release_driver+0x7c/0xc4) [ 49.810110] [] (__device_release_driver+0x7c/0xc4) from [] (driver_detach+0x60/0x8c) [ 49.819535] [] (driver_detach+0x60/0x8c) from [] (bus_remove_driver+0x90/0xcc) [ 49.828452] [] (bus_remove_driver+0x90/0xcc) from [] (sys_delete_module+0x1d8/0x254) [ 49.837891] [] (sys_delete_module+0x1d8/0x254) from [] (ret_fast_syscall+0x0/0x28) [ 49.847145] Code: eb06c53a e596c030 e1a0500d e59f106c (e59c0040) [ 49.853566] ---[ end trace b5fa66a00cea142f ]--- Signed-off-by: Daniel Mack Reported-by: Sven Neumann Cc: Pierre Ossman Cc: linux-mmc@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: stable@kernel.org Signed-off-by: Eric Miao diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index b00d673..9fb480b 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -760,6 +760,8 @@ static int pxamci_remove(struct platform_device *pdev) if (mmc) { struct pxamci_host *host = mmc_priv(mmc); + mmc_remove_host(mmc); + if (host->pdata) { gpio_cd = host->pdata->gpio_card_detect; gpio_ro = host->pdata->gpio_card_ro; @@ -779,8 +781,6 @@ static int pxamci_remove(struct platform_device *pdev) if (host->pdata && host->pdata->exit) host->pdata->exit(&pdev->dev, mmc); - mmc_remove_host(mmc); - pxamci_stop_clock(host); writel(TXFIFO_WR_REQ|RXFIFO_RD_REQ|CLK_IS_OFF|STOP_CMD| END_CMD_RES|PRG_DONE|DATA_TRAN_DONE, -- cgit v0.10.2 From d99ca3b977fc5a93141304f571475c2af9e6c1c5 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 2 Dec 2009 17:26:47 +0900 Subject: sched, cputime: Cleanups related to task_times() - Remove if({u,s}t)s because no one call it with NULL now. - Use cputime_{add,sub}(). - Add ifndef-endif for prev_{u,s}time since they are used only when !VIRT_CPU_ACCOUNTING. Signed-off-by: Hidetoshi Seto Cc: Peter Zijlstra Cc: Spencer Candland Cc: Americo Wang Cc: Oleg Nesterov Cc: Balbir Singh Cc: Stanislaw Gruszka LKML-Reference: <4B1624C7.7040302@jp.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/include/linux/sched.h b/include/linux/sched.h index 0395b0f..dff85e5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1331,7 +1331,9 @@ struct task_struct { cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING cputime_t prev_utime, prev_stime; +#endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ struct timespec real_start_time; /* boot based time */ diff --git a/kernel/fork.c b/kernel/fork.c index 166b8c4..ad7cb6d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1066,8 +1066,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->gtime = cputime_zero; p->utimescaled = cputime_zero; p->stimescaled = cputime_zero; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_utime = cputime_zero; p->prev_stime = cputime_zero; +#endif p->default_timer_slack_ns = current->timer_slack_ns; diff --git a/kernel/sched.c b/kernel/sched.c index 4883fee..17e2c1d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5184,10 +5184,8 @@ void account_idle_ticks(unsigned long ticks) #ifdef CONFIG_VIRT_CPU_ACCOUNTING void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { - if (ut) - *ut = p->utime; - if (st) - *st = p->stime; + *ut = p->utime; + *st = p->stime; } #else @@ -5197,7 +5195,7 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) { - cputime_t rtime, utime = p->utime, total = utime + p->stime; + cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime); /* * Use CFS's precise accounting: @@ -5217,12 +5215,10 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) * Compare with previous values, to keep monotonicity: */ p->prev_utime = max(p->prev_utime, utime); - p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); + p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime)); - if (ut) - *ut = p->prev_utime; - if (st) - *st = p->prev_stime; + *ut = p->prev_utime; + *st = p->prev_stime; } #endif -- cgit v0.10.2 From 0cf55e1ec08bb5a22e068309e2d8ba1180ab4239 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Wed, 2 Dec 2009 17:28:07 +0900 Subject: sched, cputime: Introduce thread_group_times() This is a real fix for problem of utime/stime values decreasing described in the thread: http://lkml.org/lkml/2009/11/3/522 Now cputime is accounted in the following way: - {u,s}time in task_struct are increased every time when the thread is interrupted by a tick (timer interrupt). - When a thread exits, its {u,s}time are added to signal->{u,s}time, after adjusted by task_times(). - When all threads in a thread_group exits, accumulated {u,s}time (and also c{u,s}time) in signal struct are added to c{u,s}time in signal struct of the group's parent. So {u,s}time in task struct are "raw" tick count, while {u,s}time and c{u,s}time in signal struct are "adjusted" values. And accounted values are used by: - task_times(), to get cputime of a thread: This function returns adjusted values that originates from raw {u,s}time and scaled by sum_exec_runtime that accounted by CFS. - thread_group_cputime(), to get cputime of a thread group: This function returns sum of all {u,s}time of living threads in the group, plus {u,s}time in the signal struct that is sum of adjusted cputimes of all exited threads belonged to the group. The problem is the return value of thread_group_cputime(), because it is mixed sum of "raw" value and "adjusted" value: group's {u,s}time = foreach(thread){{u,s}time} + exited({u,s}time) This misbehavior can break {u,s}time monotonicity. Assume that if there is a thread that have raw values greater than adjusted values (e.g. interrupted by 1000Hz ticks 50 times but only runs 45ms) and if it exits, cputime will decrease (e.g. -5ms). To fix this, we could do: group's {u,s}time = foreach(t){task_times(t)} + exited({u,s}time) But task_times() contains hard divisions, so applying it for every thread should be avoided. This patch fixes the above problem in the following way: - Modify thread's exit (= __exit_signal()) not to use task_times(). It means {u,s}time in signal struct accumulates raw values instead of adjusted values. As the result it makes thread_group_cputime() to return pure sum of "raw" values. - Introduce a new function thread_group_times(*task, *utime, *stime) that converts "raw" values of thread_group_cputime() to "adjusted" values, in same calculation procedure as task_times(). - Modify group's exit (= wait_task_zombie()) to use this introduced thread_group_times(). It make c{u,s}time in signal struct to have adjusted values like before this patch. - Replace some thread_group_cputime() by thread_group_times(). This replacements are only applied where conveys the "adjusted" cputime to users, and where already uses task_times() near by it. (i.e. sys_times(), getrusage(), and /proc//stat.) This patch have a positive side effect: - Before this patch, if a group contains many short-life threads (e.g. runs 0.9ms and not interrupted by ticks), the group's cputime could be invisible since thread's cputime was accumulated after adjusted: imagine adjustment function as adj(ticks, runtime), {adj(0, 0.9) + adj(0, 0.9) + ....} = {0 + 0 + ....} = 0. After this patch it will not happen because the adjustment is applied after accumulated. v2: - remove if()s, put new variables into signal_struct. Signed-off-by: Hidetoshi Seto Acked-by: Peter Zijlstra Cc: Spencer Candland Cc: Americo Wang Cc: Oleg Nesterov Cc: Balbir Singh Cc: Stanislaw Gruszka LKML-Reference: <4B162517.8040909@jp.fujitsu.com> Signed-off-by: Ingo Molnar diff --git a/fs/proc/array.c b/fs/proc/array.c index ca61a88..2571da4 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -506,7 +506,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, /* add up live thread stats at the group level */ if (whole) { - struct task_cputime cputime; struct task_struct *t = task; do { min_flt += t->min_flt; @@ -517,9 +516,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, min_flt += sig->min_flt; maj_flt += sig->maj_flt; - thread_group_cputime(task, &cputime); - utime = cputime.utime; - stime = cputime.stime; + thread_group_times(task, &utime, &stime); gtime = cputime_add(gtime, sig->gtime); } diff --git a/include/linux/sched.h b/include/linux/sched.h index dff85e5..34238bd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -624,6 +624,9 @@ struct signal_struct { cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING + cputime_t prev_utime, prev_stime; +#endif unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; @@ -1723,6 +1726,7 @@ static inline void put_task_struct(struct task_struct *t) } extern void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st); +extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st); /* * Per process flags diff --git a/kernel/exit.c b/kernel/exit.c index 2eaf68b..b221ad6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -91,8 +91,6 @@ static void __exit_signal(struct task_struct *tsk) if (atomic_dec_and_test(&sig->count)) posix_cpu_timers_exit_group(tsk); else { - cputime_t utime, stime; - /* * If there is any task waiting for the group exit * then notify it: @@ -112,9 +110,8 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - task_times(tsk, &utime, &stime); - sig->utime = cputime_add(sig->utime, utime); - sig->stime = cputime_add(sig->stime, stime); + sig->utime = cputime_add(sig->utime, tsk->utime); + sig->stime = cputime_add(sig->stime, tsk->stime); sig->gtime = cputime_add(sig->gtime, tsk->gtime); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; @@ -1208,6 +1205,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) struct signal_struct *psig; struct signal_struct *sig; unsigned long maxrss; + cputime_t tgutime, tgstime; /* * The resource counters for the group leader are in its @@ -1223,20 +1221,23 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) * need to protect the access to parent->signal fields, * as other threads in the parent group can be right * here reaping other children at the same time. + * + * We use thread_group_times() to get times for the thread + * group, which consolidates times for all threads in the + * group including the group leader. */ + thread_group_times(p, &tgutime, &tgstime); spin_lock_irq(&p->real_parent->sighand->siglock); psig = p->real_parent->signal; sig = p->signal; psig->cutime = cputime_add(psig->cutime, - cputime_add(p->utime, - cputime_add(sig->utime, - sig->cutime))); + cputime_add(tgutime, + sig->cutime)); psig->cstime = cputime_add(psig->cstime, - cputime_add(p->stime, - cputime_add(sig->stime, - sig->cstime))); + cputime_add(tgstime, + sig->cstime)); psig->cgtime = cputime_add(psig->cgtime, cputime_add(p->gtime, diff --git a/kernel/fork.c b/kernel/fork.c index ad7cb6d..3d6f121 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -884,6 +884,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero; sig->gtime = cputime_zero; sig->cgtime = cputime_zero; +#ifndef CONFIG_VIRT_CPU_ACCOUNTING + sig->prev_utime = sig->prev_stime = cputime_zero; +#endif sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0; sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0; sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0; diff --git a/kernel/sched.c b/kernel/sched.c index 17e2c1d..e6ba726 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5187,6 +5187,16 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) *ut = p->utime; *st = p->stime; } + +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + struct task_cputime cputime; + + thread_group_cputime(p, &cputime); + + *ut = cputime.utime; + *st = cputime.stime; +} #else #ifndef nsecs_to_cputime @@ -5220,6 +5230,37 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) *ut = p->prev_utime; *st = p->prev_stime; } + +/* + * Must be called with siglock held. + */ +void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) +{ + struct signal_struct *sig = p->signal; + struct task_cputime cputime; + cputime_t rtime, utime, total; + + thread_group_cputime(p, &cputime); + + total = cputime_add(cputime.utime, cputime.stime); + rtime = nsecs_to_cputime(cputime.sum_exec_runtime); + + if (total) { + u64 temp; + + temp = (u64)(rtime * cputime.utime); + do_div(temp, total); + utime = (cputime_t)temp; + } else + utime = rtime; + + sig->prev_utime = max(sig->prev_utime, utime); + sig->prev_stime = max(sig->prev_stime, + cputime_sub(rtime, sig->prev_utime)); + + *ut = sig->prev_utime; + *st = sig->prev_stime; +} #endif /* diff --git a/kernel/sys.c b/kernel/sys.c index bbdfce0..9968c5f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -911,16 +911,15 @@ change_okay: void do_sys_times(struct tms *tms) { - struct task_cputime cputime; - cputime_t cutime, cstime; + cputime_t tgutime, tgstime, cutime, cstime; - thread_group_cputime(current, &cputime); spin_lock_irq(¤t->sighand->siglock); + thread_group_times(current, &tgutime, &tgstime); cutime = current->signal->cutime; cstime = current->signal->cstime; spin_unlock_irq(¤t->sighand->siglock); - tms->tms_utime = cputime_to_clock_t(cputime.utime); - tms->tms_stime = cputime_to_clock_t(cputime.stime); + tms->tms_utime = cputime_to_clock_t(tgutime); + tms->tms_stime = cputime_to_clock_t(tgstime); tms->tms_cutime = cputime_to_clock_t(cutime); tms->tms_cstime = cputime_to_clock_t(cstime); } @@ -1338,8 +1337,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) { struct task_struct *t; unsigned long flags; - cputime_t utime, stime; - struct task_cputime cputime; + cputime_t tgutime, tgstime, utime, stime; unsigned long maxrss = 0; memset((char *) r, 0, sizeof *r); @@ -1372,9 +1370,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) break; case RUSAGE_SELF: - thread_group_cputime(p, &cputime); - utime = cputime_add(utime, cputime.utime); - stime = cputime_add(stime, cputime.stime); + thread_group_times(p, &tgutime, &tgstime); + utime = cputime_add(utime, tgutime); + stime = cputime_add(stime, tgstime); r->ru_nvcsw += p->signal->nvcsw; r->ru_nivcsw += p->signal->nivcsw; r->ru_minflt += p->signal->min_flt; -- cgit v0.10.2 From a91be9ee69b09cdaa02afd5d7056bc2e6d382cfe Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 2 Dec 2009 11:33:03 +0000 Subject: MIPS: Fix MIPS I build. Broken by d63c63e889bbeeaa461a8addf1245f89f3ce4ece (lmo) rsp. f1e39a4a616cd9981a9decfd5332fd07a01abb8b (kernel.org). Signed-off-by: Ralf Baechle Patchwork: http://patchwork.linux-mips.org/patch/746/ diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 3fe1fcf..fe0d798 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -306,6 +306,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, if (cpu_has_llsc && R10000_LLSC_WAR) { __asm__ __volatile__ ( + " .set mips3 \n" " li %[err], 0 \n" "1: ll %[old], (%[addr]) \n" " move %[tmp], %[new] \n" @@ -320,6 +321,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, " "STR(PTR)" 1b, 4b \n" " "STR(PTR)" 2b, 4b \n" " .previous \n" + " .set mips0 \n" : [old] "=&r" (old), [err] "=&r" (err), [tmp] "=&r" (tmp) @@ -329,6 +331,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, : "memory"); } else if (cpu_has_llsc) { __asm__ __volatile__ ( + " .set mips3 \n" " li %[err], 0 \n" "1: ll %[old], (%[addr]) \n" " move %[tmp], %[new] \n" @@ -347,6 +350,7 @@ static inline int mips_atomic_set(struct pt_regs *regs, " "STR(PTR)" 1b, 5b \n" " "STR(PTR)" 2b, 5b \n" " .previous \n" + " .set mips0 \n" : [old] "=&r" (old), [err] "=&r" (err), [tmp] "=&r" (tmp) -- cgit v0.10.2 From 4e7c81af3fa076e2d1534cc665ea2f623e631f5d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 2 Dec 2009 13:07:01 +0100 Subject: MIPS: RB532: Fix devices.c compilation. We should now use dev_set_drvdata to set the driver driver_data field. Signed-off-by: Florian Fainelli Patchwork: http://patchwork.linux-mips.org/patch/747/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c index 9f40e1f..041fc1a 100644 --- a/arch/mips/rb532/devices.c +++ b/arch/mips/rb532/devices.c @@ -110,7 +110,6 @@ static struct korina_device korina_dev0_data = { static struct platform_device korina_dev0 = { .id = -1, .name = "korina", - .dev.driver_data = &korina_dev0_data, .resource = korina_dev0_res, .num_resources = ARRAY_SIZE(korina_dev0_res), }; @@ -332,6 +331,8 @@ static int __init plat_setup_devices(void) /* set the uart clock to the current cpu frequency */ rb532_uart_res[0].uartclk = idt_cpu_freq; + dev_set_drvdata(&korina_dev0.dev, &korina_dev0_data); + return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs)); } -- cgit v0.10.2 From e8092da92e1fd38dc7c38a4eeae93eaa21764584 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 30 Nov 2009 14:01:56 +0000 Subject: regulator: Initialise wm831x structure pointor for ISINK driver The version that made it into mainline missed the initialisation of the chip handle. Signed-off-by: Mark Brown Signed-off-by: Liam Girdwood diff --git a/drivers/regulator/wm831x-isink.c b/drivers/regulator/wm831x-isink.c index 1d8d987..4885700 100644 --- a/drivers/regulator/wm831x-isink.c +++ b/drivers/regulator/wm831x-isink.c @@ -167,6 +167,8 @@ static __devinit int wm831x_isink_probe(struct platform_device *pdev) return -ENOMEM; } + isink->wm831x = wm831x; + res = platform_get_resource(pdev, IORESOURCE_IO, 0); if (res == NULL) { dev_err(&pdev->dev, "No I/O resource\n"); -- cgit v0.10.2 From 35dead4235e2b67da7275b4122fed37099c2f462 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 3 Dec 2009 00:29:15 +0100 Subject: modules: don't export section names of empty sections via sysfs On the parisc architecture we face for each and every loaded kernel module this kernel "badness warning": sysfs: cannot create duplicate filename '/module/ac97_bus/sections/.text' Badness at fs/sysfs/dir.c:487 Reason for that is, that on parisc all kernel modules do have multiple .text sections due to the usage of the -ffunction-sections compiler flag which is needed to reach all jump targets on this platform. An objdump on such a kernel module gives: Sections: Idx Name Size VMA LMA File off Algn 0 .note.gnu.build-id 00000024 00000000 00000000 00000034 2**2 CONTENTS, ALLOC, LOAD, READONLY, DATA 1 .text 00000000 00000000 00000000 00000058 2**0 CONTENTS, ALLOC, LOAD, READONLY, CODE 2 .text.ac97_bus_match 0000001c 00000000 00000000 00000058 2**2 CONTENTS, ALLOC, LOAD, READONLY, CODE 3 .text 00000000 00000000 00000000 000000d4 2**0 CONTENTS, ALLOC, LOAD, READONLY, CODE ... Since the .text sections are empty (size of 0 bytes) and won't be loaded by the kernel module loader anyway, I don't see a reason why such sections need to be listed under /sys/module//sections/ either. The attached patch does solve this issue by not exporting section names which are empty. This fixes bugzilla http://bugzilla.kernel.org/show_bug.cgi?id=14703 Signed-off-by: Helge Deller CC: rusty@rustcorp.com.au CC: akpm@linux-foundation.org CC: James.Bottomley@HansenPartnership.com CC: roland@redhat.com CC: dave@hiauly1.hia.nrc.ca Signed-off-by: Linus Torvalds diff --git a/kernel/module.c b/kernel/module.c index 8b7d880..5842a71 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1187,7 +1187,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, /* Count loaded sections and allocate structures */ for (i = 0; i < nsect; i++) - if (sechdrs[i].sh_flags & SHF_ALLOC) + if (sechdrs[i].sh_flags & SHF_ALLOC + && sechdrs[i].sh_size) nloaded++; size[0] = ALIGN(sizeof(*sect_attrs) + nloaded * sizeof(sect_attrs->attrs[0]), @@ -1207,6 +1208,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect, for (i = 0; i < nsect; i++) { if (! (sechdrs[i].sh_flags & SHF_ALLOC)) continue; + if (!sechdrs[i].sh_size) + continue; sattr->address = sechdrs[i].sh_addr; sattr->name = kstrdup(secstrings + sechdrs[i].sh_name, GFP_KERNEL); -- cgit v0.10.2 From f066a4f6df68f03b565dfe867dde54dfeb26576e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 1 Dec 2009 14:56:44 +1030 Subject: param: don't complain about unused module parameters. Jon confirms that recent modprobe will look in /proc/cmdline, so these cmdline options can still be used. See http://bugzilla.kernel.org/show_bug.cgi?id=14164 Reported-by: Adam Williamson Cc: stable@kernel.org Signed-off-by: Rusty Russell Signed-off-by: Linus Torvalds diff --git a/init/main.c b/init/main.c index 5988deb..4051d75 100644 --- a/init/main.c +++ b/init/main.c @@ -251,7 +251,7 @@ early_param("loglevel", loglevel); /* * Unknown boot options get handed to init, unless they look like - * failed parameters + * unused parameters (modprobe will find them in /proc/cmdline). */ static int __init unknown_bootoption(char *param, char *val) { @@ -272,14 +272,9 @@ static int __init unknown_bootoption(char *param, char *val) if (obsolete_checksetup(param)) return 0; - /* - * Preemptive maintenance for "why didn't my misspelled command - * line work?" - */ - if (strchr(param, '.') && (!val || strchr(param, '.') < val)) { - printk(KERN_ERR "Unknown boot option `%s': ignoring\n", param); + /* Unused module parameter. */ + if (strchr(param, '.') && (!val || strchr(param, '.') < val)) return 0; - } if (panic_later) return 0; -- cgit v0.10.2 From 049e2d13b8e8a6d8be43e675a5ed9d4613819f65 Mon Sep 17 00:00:00 2001 From: Anisse Astier Date: Tue, 1 Dec 2009 01:14:25 -0800 Subject: Input: i8042 - add Dell Vostro 1320, 1520 and 1720 to the reset list These laptops often leave i8042 in a wierd state resulting in non- operational touchpad and keyboard. Signed-off-by: Anisse Astier Signed-off-by: Dmitry Torokhov Signed-off-by: Linus Torvalds diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index a537925..2bcf1ac 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -447,6 +447,27 @@ static struct dmi_system_id __initdata i8042_dmi_reset_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "N10"), }, }, + { + .ident = "Dell Vostro 1320", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1320"), + }, + }, + { + .ident = "Dell Vostro 1520", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1520"), + }, + }, + { + .ident = "Dell Vostro 1720", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 1720"), + }, + }, { } }; -- cgit v0.10.2 From dbf763a2f1c117cfe45bbbd2c874a150f0e0900b Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Thu, 3 Sep 2009 22:59:01 +0900 Subject: SPI: spi_txx9: Fix bit rate calculation TXx9 SPI bit rate is calculated by: fBR = (spi-baseclk) / (n + 1) Fix calculation of min_speed_hz, max_speed_hz and n. Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle diff --git a/drivers/spi/spi_txx9.c b/drivers/spi/spi_txx9.c index 96057de..19f7562 100644 --- a/drivers/spi/spi_txx9.c +++ b/drivers/spi/spi_txx9.c @@ -29,6 +29,8 @@ #define SPI_FIFO_SIZE 4 +#define SPI_MAX_DIVIDER 0xff /* Max. value for SPCR1.SER */ +#define SPI_MIN_DIVIDER 1 /* Min. value for SPCR1.SER */ #define TXx9_SPMCR 0x00 #define TXx9_SPCR0 0x04 @@ -193,11 +195,8 @@ static void txx9spi_work_one(struct txx9spi *c, struct spi_message *m) if (prev_speed_hz != speed_hz || prev_bits_per_word != bits_per_word) { - u32 n = (c->baseclk + speed_hz - 1) / speed_hz; - if (n < 1) - n = 1; - else if (n > 0xff) - n = 0xff; + int n = DIV_ROUND_UP(c->baseclk, speed_hz) - 1; + n = clamp(n, SPI_MIN_DIVIDER, SPI_MAX_DIVIDER); /* enter config mode */ txx9spi_wr(c, mcr | TXx9_SPMCR_CONFIG | TXx9_SPMCR_BCLR, TXx9_SPMCR); @@ -370,8 +369,8 @@ static int __init txx9spi_probe(struct platform_device *dev) goto exit; } c->baseclk = clk_get_rate(c->clk); - c->min_speed_hz = (c->baseclk + 0xff - 1) / 0xff; - c->max_speed_hz = c->baseclk; + c->min_speed_hz = DIV_ROUND_UP(c->baseclk, SPI_MAX_DIVIDER + 1); + c->max_speed_hz = c->baseclk / (SPI_MIN_DIVIDER + 1); res = platform_get_resource(dev, IORESOURCE_MEM, 0); if (!res) -- cgit v0.10.2 From 0fdd07f77fd9cc6a7d49076793daef06ea5d8f13 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 9 Aug 2009 11:42:32 +0200 Subject: VIDEO: Correct use of request_region/request_mem_region request_region should be used with release_region, not request_mem_region. Geert Uytterhoeven pointed out that in the case of drivers/video/gbefb.c, the problem is actually the other way around; request_mem_region should be used instead of request_region. The semantic patch that finds/fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @r1@ expression start; @@ request_region(start,...) @b1@ expression r1.start; @@ request_mem_region(start,...) @depends on !b1@ expression r1.start; expression E; @@ - release_mem_region + release_region (start,E) // Signed-off-by: Julia Lawall Signed-off-by: Ralf Baechle diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c index 1a83709..f67db42 100644 --- a/drivers/video/gbefb.c +++ b/drivers/video/gbefb.c @@ -1147,7 +1147,7 @@ static int __init gbefb_probe(struct platform_device *p_dev) gbefb_setup(options); #endif - if (!request_region(GBE_BASE, sizeof(struct sgi_gbe), "GBE")) { + if (!request_mem_region(GBE_BASE, sizeof(struct sgi_gbe), "GBE")) { printk(KERN_ERR "gbefb: couldn't reserve mmio region\n"); ret = -EBUSY; goto out_release_framebuffer; -- cgit v0.10.2 From 22763c5cf3690a681551162c15d34d935308c8d7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 2 Dec 2009 19:51:21 -0800 Subject: Linux 2.6.32 diff --git a/Makefile b/Makefile index ad82601..f5cdb72 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 32 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Man-Eating Seals of Antiquity # *DOCUMENTATION* -- cgit v0.10.2 From 7cff7ce94a7df2ccf5ac76b48ee0995fee2060df Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 9 Oct 2009 00:01:39 -0700 Subject: include/linux/compiler-gcc4.h: Fix build bug - gcc-4.0.2 doesn't understand __builtin_object_size Maybe 4.1.0 doesn't too, but this fixed it for me. Caused by: 4a31276: x86: Turn the copy_from_user check into an (optional) compile time warning 9f0cf4a: x86: Use __builtin_object_size() to validate the buffer size for copy_from_user() Signed-off-by: Andrew Morton Cc: Arjan van de Ven LKML-Reference: <200910090724.n997OQl6013538@imap1.linux-foundation.org> Signed-off-by: Ingo Molnar diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 77542c5..e6ef279 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -38,7 +38,9 @@ #endif +#if __GNUC_MINOR__ > 0 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0) +#endif #if __GNUC_MINOR__ >= 4 #define __compiletime_warning(message) __attribute__((warning(message))) #define __compiletime_error(message) __attribute__((error(message))) -- cgit v0.10.2 From d3f6bad3911736e44ba11f3f3f6ac4e8c837fdfc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Dec 2009 12:10:13 -0800 Subject: rcu: Rename "quiet" functions The number of "quiet" functions has grown recently, and the names are no longer very descriptive. The point of all of these functions is to do some portion of the task of reporting a quiescent state, so rename them accordingly: o cpu_quiet() becomes rcu_report_qs_rdp(), which reports a quiescent state to the per-CPU rcu_data structure. If this turns out to be a new quiescent state for this grace period, then rcu_report_qs_rnp() will be invoked to propagate the quiescent state up the rcu_node hierarchy. o cpu_quiet_msk() becomes rcu_report_qs_rnp(), which reports a quiescent state for a given CPU (or possibly a set of CPUs) up the rcu_node hierarchy. o cpu_quiet_msk_finish() becomes rcu_report_qs_rsp(), which reports a full set of quiescent states to the global rcu_state structure. o task_quiet() becomes rcu_report_unblock_qs_rnp(), which reports a quiescent state due to a task exiting an RCU read-side critical section that had previously blocked in that same critical section. As indicated by the new name, this type of quiescent state is reported up the rcu_node hierarchy (using rcu_report_qs_rnp() to do so). Signed-off-by: Paul E. McKenney Acked-by: Josh Triplett Acked-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12597846163698-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 4ca7e02..a9f5103 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -740,11 +740,13 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags) } /* - * Clean up after the prior grace period and let rcu_start_gp() start up - * the next grace period if one is needed. Note that the caller must - * hold rnp->lock, as required by rcu_start_gp(), which will release it. + * Report a full set of quiescent states to the specified rcu_state + * data structure. This involves cleaning up after the prior grace + * period and letting rcu_start_gp() start up the next grace period + * if one is needed. Note that the caller must hold rnp->lock, as + * required by rcu_start_gp(), which will release it. */ -static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) +static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) __releases(rcu_get_root(rsp)->lock) { WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); @@ -754,15 +756,16 @@ static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags) } /* - * Similar to cpu_quiet(), for which it is a helper function. Allows - * a group of CPUs to be quieted at one go, though all the CPUs in the - * group must be represented by the same leaf rcu_node structure. - * That structure's lock must be held upon entry, and it is released - * before return. + * Similar to rcu_report_qs_rdp(), for which it is a helper function. + * Allows quiescent states for a group of CPUs to be reported at one go + * to the specified rcu_node structure, though all the CPUs in the group + * must be represented by the same rcu_node structure (which need not be + * a leaf rcu_node structure, though it often will be). That structure's + * lock must be held upon entry, and it is released before return. */ static void -cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, - unsigned long flags) +rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, + struct rcu_node *rnp, unsigned long flags) __releases(rnp->lock) { struct rcu_node *rnp_c; @@ -798,21 +801,23 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, /* * Get here if we are the last CPU to pass through a quiescent - * state for this grace period. Invoke cpu_quiet_msk_finish() + * state for this grace period. Invoke rcu_report_qs_rsp() * to clean up and start the next grace period if one is needed. */ - cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */ + rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ } /* - * Record a quiescent state for the specified CPU, which must either be - * the current CPU. The lastcomp argument is used to make sure we are - * still in the grace period of interest. We don't want to end the current - * grace period based on quiescent states detected in an earlier grace - * period! + * Record a quiescent state for the specified CPU to that CPU's rcu_data + * structure. This must be either called from the specified CPU, or + * called when the specified CPU is known to be offline (and when it is + * also known that no other CPU is concurrently trying to help the offline + * CPU). The lastcomp argument is used to make sure we are still in the + * grace period of interest. We don't want to end the current grace period + * based on quiescent states detected in an earlier grace period! */ static void -cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) +rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) { unsigned long flags; unsigned long mask; @@ -827,8 +832,8 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) * The race with GP start is resolved by the fact that we * hold the leaf rcu_node lock, so that the per-CPU bits * cannot yet be initialized -- so we would simply find our - * CPU's bit already cleared in cpu_quiet_msk() if this race - * occurred. + * CPU's bit already cleared in rcu_report_qs_rnp() if this + * race occurred. */ rdp->passed_quiesc = 0; /* try again later! */ spin_unlock_irqrestore(&rnp->lock, flags); @@ -846,7 +851,7 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) */ rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; - cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */ + rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */ } } @@ -877,8 +882,11 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) if (!rdp->passed_quiesc) return; - /* Tell RCU we are done (but cpu_quiet() will be the judge of that). */ - cpu_quiet(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); + /* + * Tell RCU we are done (but rcu_report_qs_rdp() will be the + * judge of that). + */ + rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); } #ifdef CONFIG_HOTPLUG_CPU @@ -968,13 +976,13 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) /* * We still hold the leaf rcu_node structure lock here, and * irqs are still disabled. The reason for this subterfuge is - * because invoking task_quiet() with ->onofflock held leads - * to deadlock. + * because invoking rcu_report_unblock_qs_rnp() with ->onofflock + * held leads to deadlock. */ spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ rnp = rdp->mynode; if (need_quiet) - task_quiet(rnp, flags); + rcu_report_unblock_qs_rnp(rnp, flags); else spin_unlock_irqrestore(&rnp->lock, flags); @@ -1164,8 +1172,8 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, } if (mask != 0 && rnp->completed == lastcomp) { - /* cpu_quiet_msk() releases rnp->lock. */ - cpu_quiet_msk(mask, rsp, rnp, flags); + /* rcu_report_qs_rnp() releases rnp->lock. */ + rcu_report_qs_rnp(mask, rsp, rnp, flags); continue; } spin_unlock_irqrestore(&rnp->lock, flags); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index a81188c..8bb03cb 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -306,7 +306,8 @@ long rcu_batches_completed(void); static void rcu_preempt_note_context_switch(int cpu); static int rcu_preempted_readers(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU -static void task_quiet(struct rcu_node *rnp, unsigned long flags); +static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, + unsigned long flags); #endif /* #ifdef CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_RCU_CPU_STALL_DETECTOR static void rcu_print_task_stall(struct rcu_node *rnp); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 1d295c7..c9f0c97 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -167,7 +167,7 @@ static int rcu_preempted_readers(struct rcu_node *rnp) * irqs disabled, and this lock is released upon return, but irqs remain * disabled. */ -static void task_quiet(struct rcu_node *rnp, unsigned long flags) +static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) __releases(rnp->lock) { unsigned long mask; @@ -185,7 +185,7 @@ static void task_quiet(struct rcu_node *rnp, unsigned long flags) * or tasks were kicked up to root rcu_node due to * CPUs going offline. */ - cpu_quiet_msk_finish(&rcu_preempt_state, flags); + rcu_report_qs_rsp(&rcu_preempt_state, flags); return; } @@ -193,7 +193,7 @@ static void task_quiet(struct rcu_node *rnp, unsigned long flags) mask = rnp->grpmask; spin_unlock(&rnp->lock); /* irqs remain disabled. */ spin_lock(&rnp_p->lock); /* irqs already disabled. */ - cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags); + rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); } /* @@ -253,12 +253,12 @@ static void rcu_read_unlock_special(struct task_struct *t) /* * If this was the last task on the current list, and if * we aren't waiting on any CPUs, report the quiescent state. - * Note that task_quiet() releases rnp->lock. + * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. */ if (empty) spin_unlock_irqrestore(&rnp->lock, flags); else - task_quiet(rnp, flags); + rcu_report_unblock_qs_rnp(rnp, flags); } else { local_irq_restore(flags); } @@ -566,7 +566,7 @@ static int rcu_preempted_readers(struct rcu_node *rnp) #ifdef CONFIG_HOTPLUG_CPU /* Because preemptible RCU does not exist, no quieting of tasks. */ -static void task_quiet(struct rcu_node *rnp, unsigned long flags) +static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) { spin_unlock_irqrestore(&rnp->lock, flags); } -- cgit v0.10.2 From cf244dc01bf68e1ad338b82447f8686d24ea4435 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Dec 2009 12:10:14 -0800 Subject: rcu: Enable fourth level of TREE_RCU hierarchy Enable a fourth level of rcu_node hierarchy for TREE_RCU and TREE_PREEMPT_RCU. This is for stress-testing and experiemental purposes only, although in theory this would enable 16,777,216 CPUs on 64-bit systems, though only 1,048,576 CPUs on 32-bit systems. Normal experimental use of this fourth level will normally set CONFIG_RCU_FANOUT=2, requiring a 16-CPU system, though the more adventurous (and more fortunate) experimenters may wish to chose CONFIG_RCU_FANOUT=3 for 81-CPU systems or even CONFIG_RCU_FANOUT=4 for 256-CPU systems. Signed-off-by: Paul E. McKenney Acked-by: Josh Triplett Acked-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12597846161257-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutree.c b/kernel/rcutree.c index a9f5103..d47e03e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -60,7 +60,8 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; NUM_RCU_LVL_0, /* root of hierarchy. */ \ NUM_RCU_LVL_1, \ NUM_RCU_LVL_2, \ - NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \ + NUM_RCU_LVL_3, \ + NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \ }, \ .signaled = RCU_GP_IDLE, \ .gpnum = -300, \ @@ -1877,6 +1878,9 @@ void __init rcu_init(void) #ifdef CONFIG_RCU_CPU_STALL_DETECTOR printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ +#if NUM_RCU_LVL_4 != 0 + printk(KERN_INFO "Experimental four-level hierarchy is enabled.\n"); +#endif /* #if NUM_RCU_LVL_4 != 0 */ RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); __rcu_init_preempt(); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 8bb03cb..df2e0b6 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -34,10 +34,11 @@ * In practice, this has not been tested, so there is probably some * bug somewhere. */ -#define MAX_RCU_LVLS 3 +#define MAX_RCU_LVLS 4 #define RCU_FANOUT (CONFIG_RCU_FANOUT) #define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) #define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) +#define RCU_FANOUT_FOURTH (RCU_FANOUT_CUBE * RCU_FANOUT) #if NR_CPUS <= RCU_FANOUT # define NUM_RCU_LVLS 1 @@ -45,23 +46,33 @@ # define NUM_RCU_LVL_1 (NR_CPUS) # define NUM_RCU_LVL_2 0 # define NUM_RCU_LVL_3 0 +# define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_SQ # define NUM_RCU_LVLS 2 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) # define NUM_RCU_LVL_2 (NR_CPUS) # define NUM_RCU_LVL_3 0 +# define NUM_RCU_LVL_4 0 #elif NR_CPUS <= RCU_FANOUT_CUBE # define NUM_RCU_LVLS 3 # define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) # define NUM_RCU_LVL_3 NR_CPUS +# define NUM_RCU_LVL_4 0 +#elif NR_CPUS <= RCU_FANOUT_FOURTH +# define NUM_RCU_LVLS 4 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE) +# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) +# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) +# define NUM_RCU_LVL_4 NR_CPUS #else # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" #endif /* #if (NR_CPUS) <= RCU_FANOUT */ -#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) +#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) /* -- cgit v0.10.2 From d9a3da0699b24a589b27a61e1a5b5bd30d9db669 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Dec 2009 12:10:15 -0800 Subject: rcu: Add expedited grace-period support for preemptible RCU Implement an synchronize_rcu_expedited() for preemptible RCU that actually is expedited. This uses synchronize_sched_expedited() to force all threads currently running in a preemptible-RCU read-side critical section onto the appropriate ->blocked_tasks[] list, then takes a snapshot of all of these lists and waits for them to drain. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1259784616158-git-send-email-> Signed-off-by: Ingo Molnar diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 3dd0ca2..a621a67 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -327,6 +327,11 @@ rcu_torture_cb(struct rcu_head *p) cur_ops->deferred_free(rp); } +static int rcu_no_completed(void) +{ + return 0; +} + static void rcu_torture_deferred_free(struct rcu_torture *p) { call_rcu(&p->rtort_rcu, rcu_torture_cb); @@ -388,6 +393,21 @@ static struct rcu_torture_ops rcu_sync_ops = { .name = "rcu_sync" }; +static struct rcu_torture_ops rcu_expedited_ops = { + .init = rcu_sync_torture_init, + .cleanup = NULL, + .readlock = rcu_torture_read_lock, + .read_delay = rcu_read_delay, /* just reuse rcu's version. */ + .readunlock = rcu_torture_read_unlock, + .completed = rcu_no_completed, + .deferred_free = rcu_sync_torture_deferred_free, + .sync = synchronize_rcu_expedited, + .cb_barrier = NULL, + .stats = NULL, + .irq_capable = 1, + .name = "rcu_expedited" +}; + /* * Definitions for rcu_bh torture testing. */ @@ -581,11 +601,6 @@ static void sched_torture_read_unlock(int idx) preempt_enable(); } -static int sched_torture_completed(void) -{ - return 0; -} - static void rcu_sched_torture_deferred_free(struct rcu_torture *p) { call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); @@ -602,7 +617,7 @@ static struct rcu_torture_ops sched_ops = { .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, - .completed = sched_torture_completed, + .completed = rcu_no_completed, .deferred_free = rcu_sched_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = rcu_barrier_sched, @@ -617,7 +632,7 @@ static struct rcu_torture_ops sched_sync_ops = { .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, - .completed = sched_torture_completed, + .completed = rcu_no_completed, .deferred_free = rcu_sync_torture_deferred_free, .sync = sched_torture_synchronize, .cb_barrier = NULL, @@ -631,7 +646,7 @@ static struct rcu_torture_ops sched_expedited_ops = { .readlock = sched_torture_read_lock, .read_delay = rcu_read_delay, /* just reuse rcu's version. */ .readunlock = sched_torture_read_unlock, - .completed = sched_torture_completed, + .completed = rcu_no_completed, .deferred_free = rcu_sync_torture_deferred_free, .sync = synchronize_sched_expedited, .cb_barrier = NULL, @@ -1116,7 +1131,8 @@ rcu_torture_init(void) int cpu; int firsterr = 0; static struct rcu_torture_ops *torture_ops[] = - { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, + { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, + &rcu_bh_ops, &rcu_bh_sync_ops, &srcu_ops, &srcu_expedited_ops, &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d47e03e..53ae959 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -948,7 +948,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) { unsigned long flags; unsigned long mask; - int need_quiet = 0; + int need_report = 0; struct rcu_data *rdp = rsp->rda[cpu]; struct rcu_node *rnp; @@ -967,7 +967,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) break; } if (rnp == rdp->mynode) - need_quiet = rcu_preempt_offline_tasks(rsp, rnp, rdp); + need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); else spin_unlock(&rnp->lock); /* irqs remain disabled. */ mask = rnp->grpmask; @@ -982,10 +982,12 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) */ spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ rnp = rdp->mynode; - if (need_quiet) + if (need_report & RCU_OFL_TASKS_NORM_GP) rcu_report_unblock_qs_rnp(rnp, flags); else spin_unlock_irqrestore(&rnp->lock, flags); + if (need_report & RCU_OFL_TASKS_EXP_GP) + rcu_report_exp_rnp(rsp, rnp); rcu_adopt_orphan_cbs(rsp); } @@ -1843,6 +1845,8 @@ static void __init rcu_init_one(struct rcu_state *rsp) rnp->level = i; INIT_LIST_HEAD(&rnp->blocked_tasks[0]); INIT_LIST_HEAD(&rnp->blocked_tasks[1]); + INIT_LIST_HEAD(&rnp->blocked_tasks[2]); + INIT_LIST_HEAD(&rnp->blocked_tasks[3]); } } } diff --git a/kernel/rcutree.h b/kernel/rcutree.h index df2e0b6..d2a0046 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -104,8 +104,12 @@ struct rcu_node { /* an rcu_data structure, otherwise, each */ /* bit corresponds to a child rcu_node */ /* structure. */ + unsigned long expmask; /* Groups that have ->blocked_tasks[] */ + /* elements that need to drain to allow the */ + /* current expedited grace period to */ + /* complete (only for TREE_PREEMPT_RCU). */ unsigned long qsmaskinit; - /* Per-GP initialization for qsmask. */ + /* Per-GP initial value for qsmask & expmask. */ unsigned long grpmask; /* Mask to apply to parent qsmask. */ /* Only one bit will be set in this mask. */ int grplo; /* lowest-numbered CPU or group here. */ @@ -113,7 +117,7 @@ struct rcu_node { u8 grpnum; /* CPU/group number for next level up. */ u8 level; /* root is at level 0. */ struct rcu_node *parent; - struct list_head blocked_tasks[2]; + struct list_head blocked_tasks[4]; /* Tasks blocked in RCU read-side critsect. */ /* Grace period number (->gpnum) x blocked */ /* by tasks on the (x & 0x1) element of the */ @@ -128,6 +132,21 @@ struct rcu_node { for ((rnp) = &(rsp)->node[0]; \ (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) +/* + * Do a breadth-first scan of the non-leaf rcu_node structures for the + * specified rcu_state structure. Note that if there is a singleton + * rcu_node tree with but one rcu_node structure, this loop is a no-op. + */ +#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ + for ((rnp) = &(rsp)->node[0]; \ + (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) + +/* + * Scan the leaves of the rcu_node hierarchy for the specified rcu_state + * structure. Note that if there is a singleton rcu_node tree with but + * one rcu_node structure, this loop -will- visit the rcu_node structure. + * It is still a leaf node, even if it is also the root node. + */ #define rcu_for_each_leaf_node(rsp, rnp) \ for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) @@ -261,7 +280,7 @@ struct rcu_state { long gpnum; /* Current gp number. */ long completed; /* # of last completed gp. */ - /* End of fields guarded by root rcu_node's lock. */ + /* End of fields guarded by root rcu_node's lock. */ spinlock_t onofflock; /* exclude on/offline and */ /* starting new GP. Also */ @@ -293,6 +312,13 @@ struct rcu_state { #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ }; +/* Return values for rcu_preempt_offline_tasks(). */ + +#define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ + /* GP were moved to root. */ +#define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ + /* GP were moved to root. */ + #ifdef RCU_TREE_NONCORE /* @@ -333,6 +359,9 @@ static void rcu_preempt_offline_cpu(int cpu); static void rcu_preempt_check_callbacks(int cpu); static void rcu_preempt_process_callbacks(void); void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); +#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ static int rcu_preempt_pending(int cpu); static int rcu_preempt_needs_cpu(int cpu); static void __cpuinit rcu_preempt_init_percpu_data(int cpu); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c9f0c97..37fbccd 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -24,12 +24,15 @@ * Paul E. McKenney */ +#include #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); +static int rcu_preempted_readers_exp(struct rcu_node *rnp); + /* * Tell them what RCU they are running. */ @@ -157,7 +160,10 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock); */ static int rcu_preempted_readers(struct rcu_node *rnp) { - return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); + int phase = rnp->gpnum & 0x1; + + return !list_empty(&rnp->blocked_tasks[phase]) || + !list_empty(&rnp->blocked_tasks[phase + 2]); } /* @@ -204,6 +210,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) static void rcu_read_unlock_special(struct task_struct *t) { int empty; + int empty_exp; unsigned long flags; struct rcu_node *rnp; int special; @@ -247,6 +254,8 @@ static void rcu_read_unlock_special(struct task_struct *t) spin_unlock(&rnp->lock); /* irqs remain disabled. */ } empty = !rcu_preempted_readers(rnp); + empty_exp = !rcu_preempted_readers_exp(rnp); + smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ list_del_init(&t->rcu_node_entry); t->rcu_blocked_node = NULL; @@ -259,6 +268,13 @@ static void rcu_read_unlock_special(struct task_struct *t) spin_unlock_irqrestore(&rnp->lock, flags); else rcu_report_unblock_qs_rnp(rnp, flags); + + /* + * If this was the last task on the expedited lists, + * then we need to report up the rcu_node hierarchy. + */ + if (!empty_exp && !rcu_preempted_readers_exp(rnp)) + rcu_report_exp_rnp(&rcu_preempt_state, rnp); } else { local_irq_restore(flags); } @@ -343,7 +359,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, int i; struct list_head *lp; struct list_head *lp_root; - int retval; + int retval = 0; struct rcu_node *rnp_root = rcu_get_root(rsp); struct task_struct *tp; @@ -353,7 +369,9 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, } WARN_ON_ONCE(rnp != rdp->mynode && (!list_empty(&rnp->blocked_tasks[0]) || - !list_empty(&rnp->blocked_tasks[1]))); + !list_empty(&rnp->blocked_tasks[1]) || + !list_empty(&rnp->blocked_tasks[2]) || + !list_empty(&rnp->blocked_tasks[3]))); /* * Move tasks up to root rcu_node. Rely on the fact that the @@ -361,8 +379,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, * rcu_nodes in terms of gp_num value. This fact allows us to * move the blocked_tasks[] array directly, element by element. */ - retval = rcu_preempted_readers(rnp); - for (i = 0; i < 2; i++) { + if (rcu_preempted_readers(rnp)) + retval |= RCU_OFL_TASKS_NORM_GP; + if (rcu_preempted_readers_exp(rnp)) + retval |= RCU_OFL_TASKS_EXP_GP; + for (i = 0; i < 4; i++) { lp = &rnp->blocked_tasks[i]; lp_root = &rnp_root->blocked_tasks[i]; while (!list_empty(lp)) { @@ -449,14 +470,159 @@ void synchronize_rcu(void) } EXPORT_SYMBOL_GPL(synchronize_rcu); +static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); +static long sync_rcu_preempt_exp_count; +static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); + +/* + * Return non-zero if there are any tasks in RCU read-side critical + * sections blocking the current preemptible-RCU expedited grace period. + * If there is no preemptible-RCU expedited grace period currently in + * progress, returns zero unconditionally. + */ +static int rcu_preempted_readers_exp(struct rcu_node *rnp) +{ + return !list_empty(&rnp->blocked_tasks[2]) || + !list_empty(&rnp->blocked_tasks[3]); +} + +/* + * return non-zero if there is no RCU expedited grace period in progress + * for the specified rcu_node structure, in other words, if all CPUs and + * tasks covered by the specified rcu_node structure have done their bit + * for the current expedited grace period. Works only for preemptible + * RCU -- other RCU implementation use other means. + * + * Caller must hold sync_rcu_preempt_exp_mutex. + */ +static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) +{ + return !rcu_preempted_readers_exp(rnp) && + ACCESS_ONCE(rnp->expmask) == 0; +} + +/* + * Report the exit from RCU read-side critical section for the last task + * that queued itself during or before the current expedited preemptible-RCU + * grace period. This event is reported either to the rcu_node structure on + * which the task was queued or to one of that rcu_node structure's ancestors, + * recursively up the tree. (Calm down, calm down, we do the recursion + * iteratively!) + * + * Caller must hold sync_rcu_preempt_exp_mutex. + */ +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) +{ + unsigned long flags; + unsigned long mask; + + spin_lock_irqsave(&rnp->lock, flags); + for (;;) { + if (!sync_rcu_preempt_exp_done(rnp)) + break; + if (rnp->parent == NULL) { + wake_up(&sync_rcu_preempt_exp_wq); + break; + } + mask = rnp->grpmask; + spin_unlock(&rnp->lock); /* irqs remain disabled */ + rnp = rnp->parent; + spin_lock(&rnp->lock); /* irqs already disabled */ + rnp->expmask &= ~mask; + } + spin_unlock_irqrestore(&rnp->lock, flags); +} + +/* + * Snapshot the tasks blocking the newly started preemptible-RCU expedited + * grace period for the specified rcu_node structure. If there are no such + * tasks, report it up the rcu_node hierarchy. + * + * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. + */ +static void +sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) +{ + int must_wait; + + spin_lock(&rnp->lock); /* irqs already disabled */ + list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); + list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); + must_wait = rcu_preempted_readers_exp(rnp); + spin_unlock(&rnp->lock); /* irqs remain disabled */ + if (!must_wait) + rcu_report_exp_rnp(rsp, rnp); +} + /* - * Wait for an rcu-preempt grace period. We are supposed to expedite the - * grace period, but this is the crude slow compatability hack, so just - * invoke synchronize_rcu(). + * Wait for an rcu-preempt grace period, but expedite it. The basic idea + * is to invoke synchronize_sched_expedited() to push all the tasks to + * the ->blocked_tasks[] lists, move all entries from the first set of + * ->blocked_tasks[] lists to the second set, and finally wait for this + * second set to drain. */ void synchronize_rcu_expedited(void) { - synchronize_rcu(); + unsigned long flags; + struct rcu_node *rnp; + struct rcu_state *rsp = &rcu_preempt_state; + long snap; + int trycount = 0; + + smp_mb(); /* Caller's modifications seen first by other CPUs. */ + snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; + smp_mb(); /* Above access cannot bleed into critical section. */ + + /* + * Acquire lock, falling back to synchronize_rcu() if too many + * lock-acquisition failures. Of course, if someone does the + * expedited grace period for us, just leave. + */ + while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { + if (trycount++ < 10) + udelay(trycount * num_online_cpus()); + else { + synchronize_rcu(); + return; + } + if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) + goto mb_ret; /* Others did our work for us. */ + } + if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) + goto unlock_mb_ret; /* Others did our work for us. */ + + /* force all RCU readers onto blocked_tasks[]. */ + synchronize_sched_expedited(); + + spin_lock_irqsave(&rsp->onofflock, flags); + + /* Initialize ->expmask for all non-leaf rcu_node structures. */ + rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { + spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->expmask = rnp->qsmaskinit; + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + } + + /* Snapshot current state of ->blocked_tasks[] lists. */ + rcu_for_each_leaf_node(rsp, rnp) + sync_rcu_preempt_exp_init(rsp, rnp); + if (NUM_RCU_NODES > 1) + sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); + + spin_unlock_irqrestore(&rsp->onofflock, flags); + + /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ + rnp = rcu_get_root(rsp); + wait_event(sync_rcu_preempt_exp_wq, + sync_rcu_preempt_exp_done(rnp)); + + /* Clean up and exit. */ + smp_mb(); /* ensure expedited GP seen before counter increment. */ + ACCESS_ONCE(sync_rcu_preempt_exp_count)++; +unlock_mb_ret: + mutex_unlock(&sync_rcu_preempt_exp_mutex); +mb_ret: + smp_mb(); /* ensure subsequent action seen after grace period. */ } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); @@ -655,6 +821,20 @@ void synchronize_rcu_expedited(void) } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Because preemptable RCU does not exist, there is never any need to + * report on tasks preempted in RCU read-side critical sections during + * expedited RCU grace periods. + */ +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) +{ + return; +} + +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ + /* * Because preemptable RCU does not exist, it never has any work to do. */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 1984cdc..9d2c884 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -157,6 +157,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) { long gpnum; int level = 0; + int phase; struct rcu_node *rnp; gpnum = rsp->gpnum; @@ -173,10 +174,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) seq_puts(m, "\n"); level = rnp->level; } - seq_printf(m, "%lx/%lx %c>%c %d:%d ^%d ", + phase = gpnum & 0x1; + seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d ", rnp->qsmask, rnp->qsmaskinit, - "T."[list_empty(&rnp->blocked_tasks[gpnum & 1])], - "T."[list_empty(&rnp->blocked_tasks[!(gpnum & 1)])], + "T."[list_empty(&rnp->blocked_tasks[phase])], + "E."[list_empty(&rnp->blocked_tasks[phase + 2])], + "T."[list_empty(&rnp->blocked_tasks[!phase])], + "E."[list_empty(&rnp->blocked_tasks[!phase + 2])], rnp->grplo, rnp->grphi, rnp->grpnum); } seq_puts(m, "\n"); -- cgit v0.10.2 From 8bfb2f8e655b9d0c45fde679fcd5fd97e34513db Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 2 Dec 2009 12:10:16 -0800 Subject: rcu: Make RCU's CPU-stall detector be default The RCU_CPU_STALL_DETECTOR costs almost nothing and has located some bugs that might otherwise have been difficult to track down. Make it be default for the TREE RCU implementations. The vmlinux size impact is limited (on 64-bit x86 defconfig): text data bss dec hex filename 8440248 1260076 995588 10695912 a334e8 vmlinux.before 8440774 1260060 995588 10696422 a336e6 vmlinux.after +526 bytes - acceptable default cost. For RAM starved systems, TINY_RCU does not support CPU-stall detection and is much smaller, but then again it is a uniprocessor... Signed-off-by: Paul E. McKenney Acked-by: Lai Jiangshan Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12597846162906-git-send-email-> [ v2: added image size calculations to the changelog ] Signed-off-by: Ingo Molnar diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8911558..50e0e78 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -749,7 +749,7 @@ config RCU_TORTURE_TEST_RUNNABLE config RCU_CPU_STALL_DETECTOR bool "Check for stalled CPUs delaying RCU grace periods" depends on TREE_RCU || TREE_PREEMPT_RCU - default n + default y help This option causes RCU to printk information on which CPUs are delaying the current grace period, but only when -- cgit v0.10.2 From 4528752f49c1f4025473d12bc5fa9181085c3f22 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 2 Dec 2009 15:05:56 -0800 Subject: x86, Calgary IOMMU quirk: Find nearest matching Calgary while walking up the PCI tree On a multi-node x3950M2 system, there's a slight oddity in the PCI device tree for all secondary nodes: 30:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev e1) \-33:00.0 PCI bridge: IBM CalIOC2 PCI-E Root Port (rev 01) \-34:00.0 RAID bus controller: LSI Logic / Symbios Logic MegaRAID SAS 1078 (rev 04) ...as compared to the primary node: 00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev e1) \-01:00.0 VGA compatible controller: ATI Technologies Inc ES1000 (rev 02) 03:00.0 PCI bridge: IBM CalIOC2 PCI-E Root Port (rev 01) \-04:00.0 RAID bus controller: LSI Logic / Symbios Logic MegaRAID SAS 1078 (rev 04) In both nodes, the LSI RAID controller hangs off a CalIOC2 device, but on the secondary nodes, the BIOS hides the VGA device and substitutes the device tree ending with the disk controller. It would seem that Calgary devices don't necessarily appear at the top of the PCI tree, which means that the current code to find the Calgary IOMMU that goes with a particular device is buggy. Rather than walk all the way to the top of the PCI device tree and try to match bus number with Calgary descriptor, the code needs to examine each parent of the particular device; if it encounters a Calgary with a matching bus number, simply use that. Otherwise, we BUG() when the bus number of the Calgary doesn't match the bus number of whatever's at the top of the device tree. Extra note: This patch appears to work correctly for the x3950 that came before the x3950 M2. Signed-off-by: Darrick J. Wong Acked-by: Muli Ben-Yehuda Cc: FUJITA Tomonori Cc: Joerg Roedel Cc: Yinghai Lu Cc: Jon D. Mason Cc: Corinna Schultz Cc: LKML-Reference: <20091202230556.GG10295@tux1.beaverton.ibm.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 849a099..c563e4c 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -316,13 +316,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev) pdev = to_pci_dev(dev); + /* search up the device tree for an iommu */ pbus = pdev->bus; - - /* is the device behind a bridge? Look for the root bus */ - while (pbus->parent) + do { + tbl = pci_iommu(pbus); + if (tbl && tbl->it_busno == pbus->number) + break; + tbl = NULL; pbus = pbus->parent; - - tbl = pci_iommu(pbus); + } while (pbus); BUG_ON(tbl && (tbl->it_busno != pbus->number)); -- cgit v0.10.2 From c02260277e472095ffb3ad893be5eeab9dcefde3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2009 20:49:16 +0100 Subject: mutex: Better control mutex adaptive spinning config Introduce CONFIG_MUTEX_SPIN_ON_OWNER so that we can centralize in a single place the conditions that determine its definition and use. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra LKML-Reference: <1259783357-8542-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Peter Zijlstra diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index d1f86db..88c92fb 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -197,3 +197,6 @@ config INLINE_WRITE_UNLOCK_IRQ config INLINE_WRITE_UNLOCK_IRQRESTORE def_bool !DEBUG_SPINLOCK && ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + +config MUTEX_SPIN_ON_OWNER + def_bool SMP && !DEBUG_MUTEXES && !HAVE_DEFAULT_NO_SPIN_MUTEXES diff --git a/kernel/mutex.c b/kernel/mutex.c index 947b3ad..632f04c 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -148,8 +148,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, preempt_disable(); mutex_acquire(&lock->dep_map, subclass, 0, ip); -#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) && \ - !defined(CONFIG_HAVE_DEFAULT_NO_SPIN_MUTEXES) + +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* * Optimistic spinning. * -- cgit v0.10.2 From c08f782985eed9959438368e84ce1d7f2ed03d95 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 2 Dec 2009 20:49:17 +0100 Subject: mutex: Fix missing conditions to build mutex_spin_on_owner() We don't need to build mutex_spin_on_owner() if we have CONFIG_DEBUG_MUTEXES or CONFIG_HAVE_DEFAULT_NO_SPIN_MUTEXES as it won't be used under such configs. Use CONFIG_MUTEX_SPIN_ON_OWNER as it gathers all the necessary checks before building it. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra LKML-Reference: <1259783357-8542-2-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Peter Zijlstra diff --git a/kernel/sched.c b/kernel/sched.c index 3c11ae0..ec0af1f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5481,7 +5481,7 @@ need_resched_nonpreemptible: } EXPORT_SYMBOL(schedule); -#ifdef CONFIG_SMP +#ifdef CONFIG_MUTEX_SPIN_ON_OWNER /* * Look out! "owner" is an entirely speculative pointer * access and not reliable. -- cgit v0.10.2 From 7e71c55ee73988d0cb61045660b899eaac23bf8f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 22 Sep 2009 10:56:16 +0100 Subject: GFS2: Fix potential race in glock code We need to be careful of the ordering between clearing the GLF_LOCK bit and scheduling the workqueue. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8b674b1..a3f90ad 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -672,12 +672,17 @@ out: return; out_sched: + clear_bit(GLF_LOCK, &gl->gl_flags); + smp_mb__after_clear_bit(); gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put_nolock(gl); + return; + out_unlock: clear_bit(GLF_LOCK, &gl->gl_flags); - goto out; + smp_mb__after_clear_bit(); + return; } static void delete_work_func(struct work_struct *work) @@ -1375,10 +1380,11 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) handle_callback(gl, LM_ST_UNLOCKED, 0); nr--; } + clear_bit(GLF_LOCK, &gl->gl_flags); + smp_mb__after_clear_bit(); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put_nolock(gl); spin_unlock(&gl->gl_spin); - clear_bit(GLF_LOCK, &gl->gl_flags); spin_lock(&lru_lock); continue; } -- cgit v0.10.2 From f55073ff1eaf99f6b3bc62134a456638bca043a3 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 Sep 2009 10:30:49 +0100 Subject: GFS2: Fix -o meta mounts for subsequent mounts (i.e. all but the first one) We have a long term plan to use the "-o meta" flag to GFS2 mounts to access the alternate root which is used to store metadata for a GFS2 filesystem. This will allow us to eventually remove support for the gfs2meta filesystem type (which is in any case just a "front end" to the gfs2 filesystem type with the meta/master root). Currently the "-o meta" option is only taken into account on the initial mount of the filesystem. Subsequent mounts of the same filesystem (i.e. on the same device) result in basically the same as bind mounting the root of the original mount. This patch changes that by using what is more or less a copy of get_sb_bdev() and extending it so that it will take into account the alternate root in all cases. The main difference is that we have to parse the mount options a bit earlier. We can then use them to select the appropriate root towards the end of the function. In addition this also fixes a bug where it was possible (but certainly not desirable) to set different ro/rw options for the meta root when mounted via the gfs2meta fs compared with the original mount. Signed-off-by: Steven Whitehouse Cc: Alexander Viro diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 52fb6c0..e5ee062 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1114,7 +1114,7 @@ void gfs2_online_uevent(struct gfs2_sbd *sdp) * Returns: errno */ -static int fill_super(struct super_block *sb, void *data, int silent) +static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent) { struct gfs2_sbd *sdp; struct gfs2_holder mount_gh; @@ -1125,17 +1125,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n"); return -ENOMEM; } - - sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; - sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; - sdp->sd_args.ar_commit = 60; - sdp->sd_args.ar_errors = GFS2_ERRORS_DEFAULT; - - error = gfs2_mount_args(sdp, &sdp->sd_args, data); - if (error) { - printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); - goto fail; - } + sdp->sd_args = *args; if (sdp->sd_args.ar_spectator) { sb->s_flags |= MS_RDONLY; @@ -1243,18 +1233,125 @@ fail: return error; } -static int gfs2_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static int set_gfs2_super(struct super_block *s, void *data) { - return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); + s->s_bdev = data; + s->s_dev = s->s_bdev->bd_dev; + + /* + * We set the bdi here to the queue backing, file systems can + * overwrite this in ->fill_super() + */ + s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + return 0; } -static int test_meta_super(struct super_block *s, void *ptr) +static int test_gfs2_super(struct super_block *s, void *ptr) { struct block_device *bdev = ptr; return (bdev == s->s_bdev); } +/** + * gfs2_get_sb - Get the GFS2 superblock + * @fs_type: The GFS2 filesystem type + * @flags: Mount flags + * @dev_name: The name of the device + * @data: The mount arguments + * @mnt: The vfsmnt for this mount + * + * Q. Why not use get_sb_bdev() ? + * A. We need to select one of two root directories to mount, independent + * of whether this is the initial, or subsequent, mount of this sb + * + * Returns: 0 or -ve on error + */ + +static int gfs2_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) +{ + struct block_device *bdev; + struct super_block *s; + fmode_t mode = FMODE_READ; + int error; + struct gfs2_args args; + struct gfs2_sbd *sdp; + + if (!(flags & MS_RDONLY)) + mode |= FMODE_WRITE; + + bdev = open_bdev_exclusive(dev_name, mode, fs_type); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + + /* + * once the super is inserted into the list by sget, s_umount + * will protect the lockfs code from trying to start a snapshot + * while we are mounting + */ + mutex_lock(&bdev->bd_fsfreeze_mutex); + if (bdev->bd_fsfreeze_count > 0) { + mutex_unlock(&bdev->bd_fsfreeze_mutex); + error = -EBUSY; + goto error_bdev; + } + s = sget(fs_type, test_gfs2_super, set_gfs2_super, bdev); + mutex_unlock(&bdev->bd_fsfreeze_mutex); + error = PTR_ERR(s); + if (IS_ERR(s)) + goto error_bdev; + + memset(&args, 0, sizeof(args)); + args.ar_quota = GFS2_QUOTA_DEFAULT; + args.ar_data = GFS2_DATA_DEFAULT; + args.ar_commit = 60; + args.ar_errors = GFS2_ERRORS_DEFAULT; + + error = gfs2_mount_args(&args, data); + if (error) { + printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); + if (s->s_root) + goto error_super; + deactivate_locked_super(s); + return error; + } + + if (s->s_root) { + error = -EBUSY; + if ((flags ^ s->s_flags) & MS_RDONLY) + goto error_super; + close_bdev_exclusive(bdev, mode); + } else { + char b[BDEVNAME_SIZE]; + + s->s_flags = flags; + s->s_mode = mode; + strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); + sb_set_blocksize(s, block_size(bdev)); + error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0); + if (error) { + deactivate_locked_super(s); + return error; + } + s->s_flags |= MS_ACTIVE; + bdev->bd_super = s; + } + + sdp = s->s_fs_info; + mnt->mnt_sb = s; + if (args.ar_meta) + mnt->mnt_root = dget(sdp->sd_master_dir); + else + mnt->mnt_root = dget(sdp->sd_root_dir); + return 0; + +error_super: + deactivate_locked_super(s); +error_bdev: + close_bdev_exclusive(bdev, mode); + return error; +} + static int set_meta_super(struct super_block *s, void *ptr) { return -EINVAL; @@ -1274,13 +1371,17 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, dev_name, error); return error; } - s = sget(&gfs2_fs_type, test_meta_super, set_meta_super, + s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, path.dentry->d_inode->i_sb->s_bdev); path_put(&path); if (IS_ERR(s)) { printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); return PTR_ERR(s); } + if ((flags ^ s->s_flags) & MS_RDONLY) { + deactivate_locked_super(s); + return -EBUSY; + } sdp = s->s_fs_info; mnt->mnt_sb = s; mnt->mnt_root = dget(sdp->sd_master_dir); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 0ec3ec6..42e5458 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -106,13 +106,13 @@ static const match_table_t tokens = { /** * gfs2_mount_args - Parse mount options - * @sdp: - * @data: + * @args: The structure into which the parsed options will be written + * @options: The options to parse * * Return: errno */ -int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) +int gfs2_mount_args(struct gfs2_args *args, char *options) { char *o; int token; @@ -157,7 +157,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) break; case Opt_debug: if (args->ar_errors == GFS2_ERRORS_PANIC) { - fs_info(sdp, "-o debug and -o errors=panic " + printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " "are mutually exclusive.\n"); return -EINVAL; } @@ -210,7 +210,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) case Opt_commit: rv = match_int(&tmp[0], &args->ar_commit); if (rv || args->ar_commit <= 0) { - fs_info(sdp, "commit mount option requires a positive numeric argument\n"); + printk(KERN_WARNING "GFS2: commit mount option requires a positive numeric argument\n"); return rv ? rv : -EINVAL; } break; @@ -219,7 +219,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) break; case Opt_err_panic: if (args->ar_debug) { - fs_info(sdp, "-o debug and -o errors=panic " + printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " "are mutually exclusive.\n"); return -EINVAL; } @@ -227,7 +227,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) break; case Opt_error: default: - fs_info(sdp, "invalid mount option: %s\n", o); + printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); return -EINVAL; } } @@ -1062,7 +1062,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) spin_lock(>->gt_spin); args.ar_commit = gt->gt_log_flush_secs; spin_unlock(>->gt_spin); - error = gfs2_mount_args(sdp, &args, data); + error = gfs2_mount_args(&args, data); if (error) return error; diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 235db36..ed962ea 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -27,7 +27,7 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) extern void gfs2_jindex_free(struct gfs2_sbd *sdp); -extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data); +extern int gfs2_mount_args(struct gfs2_args *args, char *data); extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); extern int gfs2_jdesc_check(struct gfs2_jdesc *jd); -- cgit v0.10.2 From 2646a1f61a3b5525914757f10fa12b5b94713648 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 2 Oct 2009 11:50:54 +0100 Subject: GFS2: Fix up system xattrs This code has been shamelessly stolen from XFS at the suggestion of Christoph Hellwig. I've not added support for cached ACLs so far... watch for that in a later patch, although this is designed in such a way that they should be easy to add. Signed-off-by: Steven Whitehouse Cc: Christoph Hellwig diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 3fc4e3a..2168da1 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -26,61 +27,6 @@ #include "trans.h" #include "util.h" -#define ACL_ACCESS 1 -#define ACL_DEFAULT 0 - -int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, - struct gfs2_ea_request *er, int *remove, mode_t *mode) -{ - struct posix_acl *acl; - int error; - - error = gfs2_acl_validate_remove(ip, access); - if (error) - return error; - - if (!er->er_data) - return -EINVAL; - - acl = posix_acl_from_xattr(er->er_data, er->er_data_len); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (!acl) { - *remove = 1; - return 0; - } - - error = posix_acl_valid(acl); - if (error) - goto out; - - if (access) { - error = posix_acl_equiv_mode(acl, mode); - if (!error) - *remove = 1; - else if (error > 0) - error = 0; - } - -out: - posix_acl_release(acl); - return error; -} - -int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access) -{ - if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl) - return -EOPNOTSUPP; - if (!is_owner_or_cap(&ip->i_inode)) - return -EPERM; - if (S_ISLNK(ip->i_inode.i_mode)) - return -EOPNOTSUPP; - if (!access && !S_ISDIR(ip->i_inode.i_mode)) - return -EACCES; - - return 0; -} - static int acl_get(struct gfs2_inode *ip, const char *name, struct posix_acl **acl, struct gfs2_ea_location *el, char **datap, unsigned int *lenp) @@ -277,3 +223,117 @@ out_brelse: return error; } +static int gfs2_acl_type(const char *name) +{ + if (strcmp(name, GFS2_POSIX_ACL_ACCESS) == 0) + return ACL_TYPE_ACCESS; + if (strcmp(name, GFS2_POSIX_ACL_DEFAULT) == 0) + return ACL_TYPE_DEFAULT; + return -EINVAL; +} + +static int gfs2_xattr_system_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + int type; + + type = gfs2_acl_type(name); + if (type < 0) + return type; + + return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size); +} + +static int gfs2_set_mode(struct inode *inode, mode_t mode) +{ + int error = 0; + + if (mode != inode->i_mode) { + struct iattr iattr; + + iattr.ia_valid = ATTR_MODE; + iattr.ia_mode = mode; + + error = gfs2_setattr_simple(GFS2_I(inode), &iattr); + } + + return error; +} + +static int gfs2_xattr_system_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + struct gfs2_sbd *sdp = GFS2_SB(inode); + struct posix_acl *acl = NULL; + int error = 0, type; + + if (!sdp->sd_args.ar_posix_acl) + return -EOPNOTSUPP; + + type = gfs2_acl_type(name); + if (type < 0) + return type; + if (flags & XATTR_CREATE) + return -EINVAL; + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return value ? -EACCES : 0; + if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + if (!value) + goto set_acl; + + acl = posix_acl_from_xattr(value, size); + if (!acl) { + /* + * acl_set_file(3) may request that we set default ACLs with + * zero length -- defend (gracefully) against that here. + */ + goto out; + } + if (IS_ERR(acl)) { + error = PTR_ERR(acl); + goto out; + } + + error = posix_acl_valid(acl); + if (error) + goto out_release; + + error = -EINVAL; + if (acl->a_count > GFS2_ACL_MAX_ENTRIES) + goto out_release; + + if (type == ACL_TYPE_ACCESS) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + + if (error <= 0) { + posix_acl_release(acl); + acl = NULL; + + if (error < 0) + return error; + } + + error = gfs2_set_mode(inode, mode); + if (error) + goto out_release; + } + +set_acl: + error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0); +out_release: + posix_acl_release(acl); +out: + return error; +} + +struct xattr_handler gfs2_xattr_system_handler = { + .prefix = XATTR_SYSTEM_PREFIX, + .get = gfs2_xattr_system_get, + .set = gfs2_xattr_system_set, +}; + diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index 6751930..cc95439 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -13,26 +13,12 @@ #include "incore.h" #define GFS2_POSIX_ACL_ACCESS "posix_acl_access" -#define GFS2_POSIX_ACL_ACCESS_LEN 16 #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" -#define GFS2_POSIX_ACL_DEFAULT_LEN 17 +#define GFS2_ACL_MAX_ENTRIES 25 -#define GFS2_ACL_IS_ACCESS(name, len) \ - ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \ - !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len))) - -#define GFS2_ACL_IS_DEFAULT(name, len) \ - ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \ - !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len))) - -struct gfs2_ea_request; - -int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, - struct gfs2_ea_request *er, - int *remove, mode_t *mode); -int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access); -int gfs2_check_acl(struct inode *inode, int mask); -int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip); -int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); +extern int gfs2_check_acl(struct inode *inode, int mask); +extern int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip); +extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); +extern struct xattr_handler gfs2_xattr_system_handler; #endif /* __ACL_DOT_H__ */ diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 8a0f8ef..6b80354 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -1507,18 +1507,6 @@ static int gfs2_xattr_user_set(struct inode *inode, const char *name, return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags); } -static int gfs2_xattr_system_get(struct inode *inode, const char *name, - void *buffer, size_t size) -{ - return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size); -} - -static int gfs2_xattr_system_set(struct inode *inode, const char *name, - const void *value, size_t size, int flags) -{ - return gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, flags); -} - static int gfs2_xattr_security_get(struct inode *inode, const char *name, void *buffer, size_t size) { @@ -1543,12 +1531,6 @@ static struct xattr_handler gfs2_xattr_security_handler = { .set = gfs2_xattr_security_set, }; -static struct xattr_handler gfs2_xattr_system_handler = { - .prefix = XATTR_SYSTEM_PREFIX, - .get = gfs2_xattr_system_get, - .set = gfs2_xattr_system_set, -}; - struct xattr_handler *gfs2_xattr_handlers[] = { &gfs2_xattr_user_handler, &gfs2_xattr_security_handler, -- cgit v0.10.2 From 796bd9524731850967d437b7f47a86acc776ea89 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 Sep 2009 12:27:23 +0100 Subject: VFS: Add forget_all_cached_acls() This is required for cluster filesystems which want to use cached ACLs so that they can invalidate the cache when required. Signed-off-by: Steven Whitehouse Cc: Alexander Viro Cc: Christoph Hellwig diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 065a365..6760816 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -147,6 +147,20 @@ static inline void forget_cached_acl(struct inode *inode, int type) if (old != ACL_NOT_CACHED) posix_acl_release(old); } + +static inline void forget_all_cached_acls(struct inode *inode) +{ + struct posix_acl *old_access, *old_default; + spin_lock(&inode->i_lock); + old_access = inode->i_acl; + old_default = inode->i_default_acl; + inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; + spin_unlock(&inode->i_lock); + if (old_access != ACL_NOT_CACHED) + posix_acl_release(old_access); + if (old_default != ACL_NOT_CACHED) + posix_acl_release(old_default); +} #endif static inline void cache_no_acl(struct inode *inode) -- cgit v0.10.2 From c65f7fb5342ecb8cb85e9b676327b3a43a5a4735 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 2 Oct 2009 11:54:39 +0100 Subject: GFS2: Use forget_all_cached_acls() Invalidate all the cached ACLs when we drop the glock. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 6985eef..78554ac 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -184,8 +185,10 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) if (flags & DIO_METADATA) { struct address_space *mapping = gl->gl_aspace->i_mapping; truncate_inode_pages(mapping, 0); - if (ip) + if (ip) { set_bit(GIF_INVALID, &ip->i_flags); + forget_all_cached_acls(&ip->i_inode); + } } if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) -- cgit v0.10.2 From 69dca42464962d8d0989b7e09877ba644c9cba66 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 Sep 2009 12:40:19 +0100 Subject: GFS2: Use gfs2_set_mode() instead of munge_mode() These two functions do the same thing, so lets only use one of them. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 2168da1..1be3148 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -104,29 +104,20 @@ int gfs2_check_acl(struct inode *inode, int mask) return -EAGAIN; } -static int munge_mode(struct gfs2_inode *ip, mode_t mode) +static int gfs2_set_mode(struct inode *inode, mode_t mode) { - struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); - struct buffer_head *dibh; - int error; + int error = 0; - error = gfs2_trans_begin(sdp, RES_DINODE, 0); - if (error) - return error; + if (mode != inode->i_mode) { + struct iattr iattr; - error = gfs2_meta_inode_buffer(ip, &dibh); - if (!error) { - gfs2_assert_withdraw(sdp, - (ip->i_inode.i_mode & S_IFMT) == (mode & S_IFMT)); - ip->i_inode.i_mode = mode; - gfs2_trans_add_bh(ip->i_gl, dibh, 1); - gfs2_dinode_out(ip, dibh->b_data); - brelse(dibh); - } + iattr.ia_valid = ATTR_MODE; + iattr.ia_mode = mode; - gfs2_trans_end(sdp); + error = gfs2_setattr_simple(GFS2_I(inode), &iattr); + } - return 0; + return error; } int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) @@ -151,7 +142,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) if (!acl) { mode &= ~current_umask(); if (mode != ip->i_inode.i_mode) - error = munge_mode(ip, mode); + error = gfs2_set_mode(&ip->i_inode, mode); return error; } @@ -181,7 +172,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) if (error) goto out; munge: - error = munge_mode(ip, mode); + error = gfs2_set_mode(&ip->i_inode, mode); out: posix_acl_release(acl); kfree(data); @@ -244,21 +235,6 @@ static int gfs2_xattr_system_get(struct inode *inode, const char *name, return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size); } -static int gfs2_set_mode(struct inode *inode, mode_t mode) -{ - int error = 0; - - if (mode != inode->i_mode) { - struct iattr iattr; - - iattr.ia_valid = ATTR_MODE; - iattr.ia_mode = mode; - - error = gfs2_setattr_simple(GFS2_I(inode), &iattr); - } - - return error; -} static int gfs2_xattr_system_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) -- cgit v0.10.2 From 479c427dd60fe1aadbbf2e6cbf2f84942baeb210 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 2 Oct 2009 12:00:00 +0100 Subject: GFS2: Clean up ACLs To prepare for support for caching of ACLs, this cleans up the GFS2 ACL support by pushing the xattr code back into xattr.c and changing the acl_get function into one which only returns ACLs so that we can drop the caching function into it shortly. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 1be3148..bd0fce9 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -27,53 +27,40 @@ #include "trans.h" #include "util.h" -static int acl_get(struct gfs2_inode *ip, const char *name, - struct posix_acl **acl, struct gfs2_ea_location *el, - char **datap, unsigned int *lenp) +static const char *gfs2_acl_name(int type) { - char *data; - unsigned int len; - int error; + switch (type) { + case ACL_TYPE_ACCESS: + return GFS2_POSIX_ACL_ACCESS; + case ACL_TYPE_DEFAULT: + return GFS2_POSIX_ACL_DEFAULT; + } + return NULL; +} - el->el_bh = NULL; +static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) +{ + struct posix_acl *acl; + const char *name; + char *data; + int len; if (!ip->i_eattr) - return 0; - - error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, el); - if (error) - return error; - if (!el->el_ea) - return 0; - if (!GFS2_EA_DATA_LEN(el->el_ea)) - goto out; - - len = GFS2_EA_DATA_LEN(el->el_ea); - data = kmalloc(len, GFP_NOFS); - error = -ENOMEM; - if (!data) - goto out; + return NULL; - error = gfs2_ea_get_copy(ip, el, data, len); - if (error < 0) - goto out_kfree; - error = 0; + name = gfs2_acl_name(type); + if (name == NULL) + return ERR_PTR(-EINVAL); - if (acl) { - *acl = posix_acl_from_xattr(data, len); - if (IS_ERR(*acl)) - error = PTR_ERR(*acl); - } + len = gfs2_xattr_acl_get(ip, name, &data); + if (len < 0) + return ERR_PTR(len); + if (len == 0) + return NULL; -out_kfree: - if (error || !datap) { - kfree(data); - } else { - *datap = data; - *lenp = len; - } -out: - return error; + acl = posix_acl_from_xattr(data, len); + kfree(data); + return acl; } /** @@ -86,14 +73,12 @@ out: int gfs2_check_acl(struct inode *inode, int mask) { - struct gfs2_ea_location el; - struct posix_acl *acl = NULL; + struct posix_acl *acl; int error; - error = acl_get(GFS2_I(inode), GFS2_POSIX_ACL_ACCESS, &acl, &el, NULL, NULL); - brelse(el.el_bh); - if (error) - return error; + acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); if (acl) { error = posix_acl_permission(inode, acl, mask); @@ -120,32 +105,57 @@ static int gfs2_set_mode(struct inode *inode, mode_t mode) return error; } -int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) +static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl) { - struct gfs2_ea_location el; - struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct posix_acl *acl = NULL, *clone; - mode_t mode = ip->i_inode.i_mode; - char *data = NULL; - unsigned int len; int error; + int len; + char *data; + const char *name = gfs2_acl_name(type); + + BUG_ON(name == NULL); + len = posix_acl_to_xattr(acl, NULL, 0); + if (len == 0) + return 0; + data = kmalloc(len, GFP_NOFS); + if (data == NULL) + return -ENOMEM; + error = posix_acl_to_xattr(acl, data, len); + if (error < 0) + goto out; + error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0); +out: + kfree(data); + return error; +} + +int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) +{ + struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); + struct posix_acl *acl, *clone; + mode_t mode = inode->i_mode; + int error = 0; if (!sdp->sd_args.ar_posix_acl) return 0; - if (S_ISLNK(ip->i_inode.i_mode)) + if (S_ISLNK(inode->i_mode)) return 0; - error = acl_get(dip, GFS2_POSIX_ACL_DEFAULT, &acl, &el, &data, &len); - brelse(el.el_bh); - if (error) - return error; + acl = gfs2_acl_get(dip, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); if (!acl) { mode &= ~current_umask(); - if (mode != ip->i_inode.i_mode) - error = gfs2_set_mode(&ip->i_inode, mode); + if (mode != inode->i_mode) + error = gfs2_set_mode(inode, mode); return error; } + if (S_ISDIR(inode->i_mode)) { + error = gfs2_acl_set(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto out; + } + clone = posix_acl_clone(acl, GFP_NOFS); error = -ENOMEM; if (!clone) @@ -153,43 +163,32 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) posix_acl_release(acl); acl = clone; - if (S_ISDIR(ip->i_inode.i_mode)) { - error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS, - GFS2_POSIX_ACL_DEFAULT, data, len, 0); - if (error) - goto out; - } - error = posix_acl_create_masq(acl, &mode); if (error < 0) goto out; if (error == 0) goto munge; - posix_acl_to_xattr(acl, data, len); - error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS, - GFS2_POSIX_ACL_ACCESS, data, len, 0); + error = gfs2_acl_set(inode, ACL_TYPE_ACCESS, acl); if (error) goto out; munge: - error = gfs2_set_mode(&ip->i_inode, mode); + error = gfs2_set_mode(inode, mode); out: posix_acl_release(acl); - kfree(data); return error; } int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) { - struct posix_acl *acl = NULL, *clone; - struct gfs2_ea_location el; + struct posix_acl *acl, *clone; char *data; unsigned int len; int error; - error = acl_get(ip, GFS2_POSIX_ACL_ACCESS, &acl, &el, &data, &len); - if (error) - goto out_brelse; + acl = gfs2_acl_get(ip, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); if (!acl) return gfs2_setattr_simple(ip, attr); @@ -202,15 +201,18 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) error = posix_acl_chmod_masq(acl, attr->ia_mode); if (!error) { + len = posix_acl_to_xattr(acl, NULL, 0); + data = kmalloc(len, GFP_NOFS); + error = -ENOMEM; + if (data == NULL) + goto out; posix_acl_to_xattr(acl, data, len); - error = gfs2_ea_acl_chmod(ip, &el, attr, data); + error = gfs2_xattr_acl_chmod(ip, attr, data); + kfree(data); } out: posix_acl_release(acl); - kfree(data); -out_brelse: - brelse(el.el_bh); return error; } diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index cc95439..9306a2e 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -17,7 +17,7 @@ #define GFS2_ACL_MAX_ENTRIES 25 extern int gfs2_check_acl(struct inode *inode, int mask); -extern int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip); +extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); extern struct xattr_handler gfs2_xattr_system_handler; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index fb15d3b..6380cd9 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -871,7 +871,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (error) goto fail_gunlock2; - error = gfs2_acl_create(dip, GFS2_I(inode)); + error = gfs2_acl_create(dip, inode); if (error) goto fail_gunlock2; diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 6b80354..912f5cb 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -186,8 +186,8 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh, return 0; } -int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, - struct gfs2_ea_location *el) +static int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, + struct gfs2_ea_location *el) { struct ea_find ef; int error; @@ -516,8 +516,8 @@ out: return error; } -int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, - char *data, size_t size) +static int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, + char *data, size_t size) { int ret; size_t len = GFS2_EA_DATA_LEN(el->el_ea); @@ -534,6 +534,36 @@ int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, return len; } +int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata) +{ + struct gfs2_ea_location el; + int error; + int len; + char *data; + + error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, &el); + if (error) + return error; + if (!el.el_ea) + goto out; + if (!GFS2_EA_DATA_LEN(el.el_ea)) + goto out; + + len = GFS2_EA_DATA_LEN(el.el_ea); + data = kmalloc(len, GFP_NOFS); + error = -ENOMEM; + if (data == NULL) + goto out; + + error = gfs2_ea_get_copy(ip, &el, data, len); + if (error == 0) + error = len; + *ppdata = data; +out: + brelse(el.el_bh); + return error; +} + /** * gfs2_xattr_get - Get a GFS2 extended attribute * @inode: The inode @@ -1259,22 +1289,26 @@ fail: return error; } -int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, - struct iattr *attr, char *data) +int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) { + struct gfs2_ea_location el; struct buffer_head *dibh; int error; - if (GFS2_EA_IS_STUFFED(el->el_ea)) { + error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el); + if (error) + return error; + + if (GFS2_EA_IS_STUFFED(el.el_ea)) { error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0); if (error) return error; - gfs2_trans_add_bh(ip->i_gl, el->el_bh, 1); - memcpy(GFS2_EA2DATA(el->el_ea), data, - GFS2_EA_DATA_LEN(el->el_ea)); + gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1); + memcpy(GFS2_EA2DATA(el.el_ea), data, + GFS2_EA_DATA_LEN(el.el_ea)); } else - error = ea_acl_chmod_unstuffed(ip, el->el_ea, data); + error = ea_acl_chmod_unstuffed(ip, el.el_ea, data); if (error) return error; diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h index cbdfd77..8d6ae58 100644 --- a/fs/gfs2/xattr.h +++ b/fs/gfs2/xattr.h @@ -62,11 +62,7 @@ extern int gfs2_ea_dealloc(struct gfs2_inode *ip); /* Exported to acl.c */ -extern int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name, - struct gfs2_ea_location *el); -extern int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, - char *data, size_t size); -extern int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el, - struct iattr *attr, char *data); +extern int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **data); +extern int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data); #endif /* __EATTR_DOT_H__ */ -- cgit v0.10.2 From 106381bfba997b83b64f68f2210e154162fc38e6 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 Sep 2009 16:26:23 +0100 Subject: GFS2: Add cached ACLs support The other patches in this series have been building towards being able to support cached ACLs like other filesystems. The only real difference with GFS2 is that we have to invalidate the cache when we drop a glock, but that is dealt with in earlier patches. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index bd0fce9..3eb1ea8 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -48,6 +48,10 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) if (!ip->i_eattr) return NULL; + acl = get_cached_acl(&ip->i_inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + name = gfs2_acl_name(type); if (name == NULL) return ERR_PTR(-EINVAL); @@ -123,6 +127,8 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl) if (error < 0) goto out; error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0); + if (!error) + set_cached_acl(inode, type, acl); out: kfree(data); return error; @@ -209,6 +215,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) posix_acl_to_xattr(acl, data, len); error = gfs2_xattr_acl_chmod(ip, attr, data); kfree(data); + set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl); } out: @@ -228,15 +235,25 @@ static int gfs2_acl_type(const char *name) static int gfs2_xattr_system_get(struct inode *inode, const char *name, void *buffer, size_t size) { + struct posix_acl *acl; int type; + int error; type = gfs2_acl_type(name); if (type < 0) return type; - return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size); -} + acl = gfs2_acl_get(GFS2_I(inode), type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + + return error; +} static int gfs2_xattr_system_set(struct inode *inode, const char *name, const void *value, size_t size, int flags) @@ -303,6 +320,12 @@ static int gfs2_xattr_system_set(struct inode *inode, const char *name, set_acl: error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0); + if (!error) { + if (acl) + set_cached_acl(inode, type, acl); + else + forget_cached_acl(inode, type); + } out_release: posix_acl_release(acl); out: -- cgit v0.10.2 From ab201832f75f58c8f5093436363f80ffa4a4c9a8 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 29 Sep 2009 16:31:03 +0100 Subject: VFS: Use GFP_NOFS in posix_acl_from_xattr() GFS2 needs to call this from under a glock, so we need GFP_NOFS and I suspect that other filesystems might require this too. Signed-off-by: Steven Whitehouse diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c index c6ad7c7..05ac0fe 100644 --- a/fs/xattr_acl.c +++ b/fs/xattr_acl.c @@ -36,7 +36,7 @@ posix_acl_from_xattr(const void *value, size_t size) if (count == 0) return NULL; - acl = posix_acl_alloc(count, GFP_KERNEL); + acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); acl_e = acl->a_entries; -- cgit v0.10.2 From 8c42d637f6f2859e0fb28b78d5add7f0dc6d0973 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 11 Sep 2009 14:36:44 +0100 Subject: GFS2: Alter arguments of gfs2_quota/statfs_sync These two functions are altered so that gfs2_quota_sync may in future be called directly from the VFS. The GFS2 superblock changes to a VFS super block and there is an addition of an int argument which is currently ignored. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 2e9b932..ed9e197 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1069,8 +1069,9 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, } } -int gfs2_quota_sync(struct gfs2_sbd *sdp) +int gfs2_quota_sync(struct super_block *sb, int type) { + struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; unsigned int max_qd = gfs2_tune_get(sdp, gt_quota_simul_sync); unsigned int num_qd; @@ -1298,12 +1299,12 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error) } static void quotad_check_timeo(struct gfs2_sbd *sdp, const char *msg, - int (*fxn)(struct gfs2_sbd *sdp), + int (*fxn)(struct super_block *sb, int type), unsigned long t, unsigned long *timeo, unsigned int *new_timeo) { if (t >= *timeo) { - int error = fxn(sdp); + int error = fxn(sdp->sd_vfs, 0); quotad_error(sdp, msg, error); *timeo = gfs2_tune_get_i(&sdp->sd_tune, new_timeo) * HZ; } else { diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 0fa5fa6..437afa7 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -25,7 +25,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid); extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 uid, u32 gid); -extern int gfs2_quota_sync(struct gfs2_sbd *sdp); +extern int gfs2_quota_sync(struct super_block *sb, int type); extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); extern int gfs2_quota_init(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 42e5458..e7b24d5 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -484,8 +484,9 @@ void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); } -int gfs2_statfs_sync(struct gfs2_sbd *sdp) +int gfs2_statfs_sync(struct super_block *sb, int type) { + struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; @@ -712,8 +713,8 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) int error; flush_workqueue(gfs2_delete_workqueue); - gfs2_quota_sync(sdp); - gfs2_statfs_sync(sdp); + gfs2_quota_sync(sdp->sd_vfs, 0); + gfs2_statfs_sync(sdp->sd_vfs, 0); error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, &t_gh); diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index ed962ea..3df60f2 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h @@ -44,7 +44,7 @@ extern void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf); extern void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, struct buffer_head *l_bh); -extern int gfs2_statfs_sync(struct gfs2_sbd *sdp); +extern int gfs2_statfs_sync(struct super_block *sb, int type); extern int gfs2_freeze_fs(struct gfs2_sbd *sdp); extern void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 4463297..be1b8ac 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -158,7 +158,7 @@ static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf, if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; - gfs2_statfs_sync(sdp); + gfs2_statfs_sync(sdp->sd_vfs, 0); return len; } @@ -171,7 +171,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; - gfs2_quota_sync(sdp); + gfs2_quota_sync(sdp->sd_vfs, 0); return len; } -- cgit v0.10.2 From cc632e7f93465597896862cf9e50baefb1999215 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 Sep 2009 09:59:02 +0100 Subject: GFS2: Hook gfs2_quota_sync into VFS via gfs2_quotactl_ops The plan is to add further operations to the gfs2_quotactl_ops in future patches. The sync operation is easy, so we start with that one. We plan to use the XFS quota control functions because they more closely match the GFS2 ones. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 5971359..4dcddf8 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -8,6 +8,8 @@ config GFS2_FS select FS_POSIX_ACL select CRC32 select SLOW_WORK + select QUOTA + select QUOTACTL help A cluster filesystem. diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e5ee062..36b11cb 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -1138,6 +1139,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent sb->s_op = &gfs2_super_ops; sb->s_export_op = &gfs2_export_ops; sb->s_xattr = gfs2_xattr_handlers; + sb->s_qcop = &gfs2_quotactl_ops; + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; sb->s_time_gran = 1; sb->s_maxbytes = MAX_LFS_FILESIZE; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index ed9e197..73a43ce 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1378,3 +1378,7 @@ int gfs2_quotad(void *data) return 0; } +const struct quotactl_ops gfs2_quotactl_ops = { + .quota_sync = gfs2_quota_sync, +}; + diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 437afa7..025d15b 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -50,5 +50,6 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) } extern int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask); +extern const struct quotactl_ops gfs2_quotactl_ops; #endif /* __QUOTA_DOT_H__ */ -- cgit v0.10.2 From 91094d0fb650decd8bf48b85d86c892d7ca913ee Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 11 Sep 2009 15:21:56 +0100 Subject: GFS2: Remove obsolete code in quota.c There is no point in testing for GLF_DEMOTE here, we might as well always release the glock at that point. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index c609894..13f0bd2 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -180,15 +180,6 @@ static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl) return gl->gl_state == LM_ST_SHARED; } -static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) -{ - int ret; - spin_lock(&gl->gl_spin); - ret = test_bit(GLF_DEMOTE, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - return ret; -} - int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, const struct gfs2_glock_operations *glops, int create, struct gfs2_glock **glp); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 73a43ce..6aaa6c5 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -843,9 +843,8 @@ restart: if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { loff_t pos; gfs2_glock_dq_uninit(q_gh); - error = gfs2_glock_nq_init(qd->qd_gl, - LM_ST_EXCLUSIVE, GL_NOCACHE, - q_gh); + error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, + GL_NOCACHE, q_gh); if (error) return error; @@ -871,11 +870,9 @@ restart: qlvb->qb_value = cpu_to_be64(q.qu_value); qd->qd_qb = *qlvb; - if (gfs2_glock_is_blocking(qd->qd_gl)) { - gfs2_glock_dq_uninit(q_gh); - force_refresh = 0; - goto restart; - } + gfs2_glock_dq_uninit(q_gh); + force_refresh = 0; + goto restart; } return 0; -- cgit v0.10.2 From 1d371b5e179d943491a5fddad211cb317f38a142 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 11 Sep 2009 15:57:27 +0100 Subject: GFS2: Add get_xstate quota function This allows querying of the quota state via the XFS quota API. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 6aaa6c5..e7114be 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -1375,7 +1376,29 @@ int gfs2_quotad(void *data) return 0; } +static int gfs2_quota_get_xstate(struct super_block *sb, + struct fs_quota_stat *fqs) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + + memset(fqs, 0, sizeof(struct fs_quota_stat)); + fqs->qs_version = FS_QSTAT_VERSION; + if (sdp->sd_args.ar_quota == GFS2_QUOTA_ON) + fqs->qs_flags = (XFS_QUOTA_UDQ_ENFD | XFS_QUOTA_GDQ_ENFD); + else if (sdp->sd_args.ar_quota == GFS2_QUOTA_ACCOUNT) + fqs->qs_flags = (XFS_QUOTA_UDQ_ACCT | XFS_QUOTA_GDQ_ACCT); + if (sdp->sd_quota_inode) { + fqs->qs_uquota.qfs_ino = GFS2_I(sdp->sd_quota_inode)->i_no_addr; + fqs->qs_uquota.qfs_nblks = sdp->sd_quota_inode->i_blocks; + } + fqs->qs_uquota.qfs_nextents = 1; /* unsupported */ + fqs->qs_gquota = fqs->qs_uquota; /* its the same inode in both cases */ + fqs->qs_incoredqs = atomic_read(&qd_lru_count); + return 0; +} + const struct quotactl_ops gfs2_quotactl_ops = { .quota_sync = gfs2_quota_sync, + .get_xstate = gfs2_quota_get_xstate, }; -- cgit v0.10.2 From ea7623385930c63e2a35749cff9db72094cd06ad Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 Sep 2009 16:20:30 +0100 Subject: GFS2: Add proper error reporting to quota sync via sysfs For some reason, the errors were not making it to userspace. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index be1b8ac..c5dad1e 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -178,6 +178,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { + int error; u32 id; if (!capable(CAP_SYS_ADMIN)) @@ -185,13 +186,14 @@ static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf, id = simple_strtoul(buf, NULL, 0); - gfs2_quota_refresh(sdp, 1, id); - return len; + error = gfs2_quota_refresh(sdp, 1, id); + return error ? error : len; } static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { + int error; u32 id; if (!capable(CAP_SYS_ADMIN)) @@ -199,8 +201,8 @@ static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf, id = simple_strtoul(buf, NULL, 0); - gfs2_quota_refresh(sdp, 0, id); - return len; + error = gfs2_quota_refresh(sdp, 0, id); + return error ? error : len; } static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len) -- cgit v0.10.2 From 33a82529e7007ed7beceebc6b3f3cddadb5b67f0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 Sep 2009 16:25:40 +0100 Subject: GFS2: Remove constant argument from qdsb_get() The "create" argument to qdsb_get() was only ever set to true, so this patch removes that argument. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e7114be..f790f5a 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -462,12 +462,12 @@ static void qd_unlock(struct gfs2_quota_data *qd) qd_put(qd); } -static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, int create, +static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, struct gfs2_quota_data **qdp) { int error; - error = qd_get(sdp, user, id, create, qdp); + error = qd_get(sdp, user, id, CREATE, qdp); if (error) return error; @@ -509,20 +509,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; - error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); if (error) goto out; al->al_qd_num++; qd++; - error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); if (error) goto out; al->al_qd_num++; qd++; if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { - error = qdsb_get(sdp, QUOTA_USER, uid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_USER, uid, qd); if (error) goto out; al->al_qd_num++; @@ -530,7 +530,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) } if (gid != NO_QUOTA_CHANGE && gid != ip->i_inode.i_gid) { - error = qdsb_get(sdp, QUOTA_GROUP, gid, CREATE, qd); + error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); if (error) goto out; al->al_qd_num++; -- cgit v0.10.2 From 6a6ada81e4ffc222bf7e54ea7503c7cc98b4f0d8 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 Sep 2009 16:30:38 +0100 Subject: GFS2: Remove constant argument from qd_get() This function was only ever called with the "create" argument set to true, so we can remove it. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index f790f5a..db124af 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -165,7 +165,7 @@ fail: return error; } -static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create, +static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, struct gfs2_quota_data **qdp) { struct gfs2_quota_data *qd = NULL, *new_qd = NULL; @@ -203,7 +203,7 @@ static int qd_get(struct gfs2_sbd *sdp, int user, u32 id, int create, spin_unlock(&qd_lru_lock); - if (qd || !create) { + if (qd) { if (new_qd) { gfs2_glock_put(new_qd->qd_gl); kmem_cache_free(gfs2_quotad_cachep, new_qd); @@ -467,7 +467,7 @@ static int qdsb_get(struct gfs2_sbd *sdp, int user, u32 id, { int error; - error = qd_get(sdp, user, id, CREATE, qdp); + error = qd_get(sdp, user, id, qdp); if (error) return error; @@ -1117,7 +1117,7 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) struct gfs2_holder q_gh; int error; - error = qd_get(sdp, user, id, CREATE, &qd); + error = qd_get(sdp, user, id, &qd); if (error) return error; -- cgit v0.10.2 From 1e72c0f7c40e665d2ed40014750fdd2fa9968bcf Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 15 Sep 2009 20:42:56 +0100 Subject: GFS2: Clean up gfs2_adjust_quota() and do_glock() Both of these functions contained confusing and in one case duplicate code. This patch adds a new check in do_glock() so that we report -ENOENT if we are asked to sync a quota entry which doesn't exist. Due to the previous patch this is now reported correctly to userspace. Also there are a few new comments, and I hope that the code is easier to understand now. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index db124af..33e369f 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -15,7 +15,7 @@ * fuzziness in the current usage value of IDs that are being used on different * nodes in the cluster simultaneously. So, it is possible for a user on * multiple nodes to overrun their quota, but that overrun is controlable. - * Since quota tags are part of transactions, there is no need to a quota check + * Since quota tags are part of transactions, there is no need for a quota check * program to be run on node crashes or anything like that. * * There are couple of knobs that let the administrator manage the quota @@ -66,13 +66,6 @@ #define QUOTA_USER 1 #define QUOTA_GROUP 0 -struct gfs2_quota_host { - u64 qu_limit; - u64 qu_warn; - s64 qu_value; - u32 qu_ll_next; -}; - struct gfs2_quota_change_host { u64 qc_change; u32 qc_flags; /* GFS2_QCF_... */ @@ -618,33 +611,19 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) mutex_unlock(&sdp->sd_quota_mutex); } -static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf) -{ - const struct gfs2_quota *str = buf; - - qu->qu_limit = be64_to_cpu(str->qu_limit); - qu->qu_warn = be64_to_cpu(str->qu_warn); - qu->qu_value = be64_to_cpu(str->qu_value); - qu->qu_ll_next = be32_to_cpu(str->qu_ll_next); -} - -static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf) -{ - struct gfs2_quota *str = buf; - - str->qu_limit = cpu_to_be64(qu->qu_limit); - str->qu_warn = cpu_to_be64(qu->qu_warn); - str->qu_value = cpu_to_be64(qu->qu_value); - str->qu_ll_next = cpu_to_be32(qu->qu_ll_next); - memset(&str->qu_reserved, 0, sizeof(str->qu_reserved)); -} - /** - * gfs2_adjust_quota + * gfs2_adjust_quota - adjust record of current block usage + * @ip: The quota inode + * @loc: Offset of the entry in the quota file + * @change: The amount of change to record + * @qd: The quota data * * This function was mostly borrowed from gfs2_block_truncate_page which was * in turn mostly borrowed from ext3 + * + * Returns: 0 or -ve on error */ + static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, s64 change, struct gfs2_quota_data *qd) { @@ -656,8 +635,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, struct buffer_head *bh; struct page *page; void *kaddr; - char *ptr; - struct gfs2_quota_host qp; + struct gfs2_quota *qp; s64 value; int err = -EIO; @@ -701,18 +679,13 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, gfs2_trans_add_bh(ip->i_gl, bh, 0); kaddr = kmap_atomic(page, KM_USER0); - ptr = kaddr + offset; - gfs2_quota_in(&qp, ptr); - qp.qu_value += change; - value = qp.qu_value; - gfs2_quota_out(&qp, ptr); + qp = kaddr + offset; + value = (s64)be64_to_cpu(qp->qu_value) + change; + qp->qu_value = cpu_to_be64(value); + qd->qd_qb.qb_value = qp->qu_value; flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); err = 0; - qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC); - qd->qd_qb.qb_value = cpu_to_be64(value); - ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC); - ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value); unlock: unlock_page(page); page_cache_release(page); @@ -741,8 +714,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); for (qx = 0; qx < num_qd; qx++) { - error = gfs2_glock_nq_init(qda[qx]->qd_gl, - LM_ST_EXCLUSIVE, + error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); if (error) goto out; @@ -797,8 +769,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) qd = qda[x]; offset = qd2offset(qd); error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, - (struct gfs2_quota_data *) - qd); + (struct gfs2_quota_data *)qd); if (error) goto out_end_trans; @@ -829,8 +800,7 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh, struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_holder i_gh; - struct gfs2_quota_host q; - char buf[sizeof(struct gfs2_quota)]; + struct gfs2_quota q; int error; struct gfs2_quota_lvb *qlvb; @@ -853,22 +823,23 @@ restart: if (error) goto fail; - memset(buf, 0, sizeof(struct gfs2_quota)); + memset(&q, 0, sizeof(struct gfs2_quota)); pos = qd2offset(qd); - error = gfs2_internal_read(ip, NULL, buf, &pos, - sizeof(struct gfs2_quota)); + error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q)); if (error < 0) goto fail_gunlock; - + if ((error < sizeof(q)) && force_refresh) { + error = -ENOENT; + goto fail_gunlock; + } gfs2_glock_dq_uninit(&i_gh); - gfs2_quota_in(&q, buf); qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); qlvb->__pad = 0; - qlvb->qb_limit = cpu_to_be64(q.qu_limit); - qlvb->qb_warn = cpu_to_be64(q.qu_warn); - qlvb->qb_value = cpu_to_be64(q.qu_value); + qlvb->qb_limit = q.qu_limit; + qlvb->qb_warn = q.qu_warn; + qlvb->qb_value = q.qu_value; qd->qd_qb = *qlvb; gfs2_glock_dq_uninit(q_gh); @@ -1126,7 +1097,6 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) gfs2_glock_dq_uninit(&q_gh); qd_put(qd); - return error; } -- cgit v0.10.2 From 113d6b3c99bf30d8083068d00e3c7304d91d4845 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 Sep 2009 11:52:16 +0100 Subject: GFS2: Add get_xquota support This adds support for viewing the current GFS2 quota settings via the XFS quota API. The setting of quotas will be addressed in a later patch. Fields which are not supported here are left set to zero. Signed-off-by: Steven Whitehouse Reviewed-by: Bob Peterson diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 33e369f..6c5d6aa7 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1367,8 +1367,51 @@ static int gfs2_quota_get_xstate(struct super_block *sb, return 0; } +static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *fdq) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + struct gfs2_quota_lvb *qlvb; + struct gfs2_quota_data *qd; + struct gfs2_holder q_gh; + int error; + + memset(fdq, 0, sizeof(struct fs_disk_quota)); + + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + return -ESRCH; /* Crazy XFS error code */ + + if (type == USRQUOTA) + type = QUOTA_USER; + else if (type == GRPQUOTA) + type = QUOTA_GROUP; + else + return -EINVAL; + + error = qd_get(sdp, type, id, &qd); + if (error) + return error; + error = do_glock(qd, FORCE, &q_gh); + if (error) + goto out; + + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; + fdq->d_version = FS_DQUOT_VERSION; + fdq->d_flags = (type == QUOTA_USER) ? XFS_USER_QUOTA : XFS_GROUP_QUOTA; + fdq->d_id = id; + fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); + fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); + fdq->d_bcount = be64_to_cpu(qlvb->qb_value); + + gfs2_glock_dq_uninit(&q_gh); +out: + qd_put(qd); + return error; +} + const struct quotactl_ops gfs2_quotactl_ops = { .quota_sync = gfs2_quota_sync, .get_xstate = gfs2_quota_get_xstate, + .get_xquota = gfs2_xquota_get, }; -- cgit v0.10.2 From e285c100362762f7440643be637dd332460fdc75 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 23 Sep 2009 13:50:49 +0100 Subject: GFS2: Add set_xquota support This patch adds the ability to set GFS2 quota limit and warning levels via the XFS quota API. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 6c5d6aa7..e8db534 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -615,8 +615,9 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) * gfs2_adjust_quota - adjust record of current block usage * @ip: The quota inode * @loc: Offset of the entry in the quota file - * @change: The amount of change to record + * @change: The amount of usage change to record * @qd: The quota data + * @fdq: The updated limits to record * * This function was mostly borrowed from gfs2_block_truncate_page which was * in turn mostly borrowed from ext3 @@ -625,19 +626,21 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change) */ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, - s64 change, struct gfs2_quota_data *qd) + s64 change, struct gfs2_quota_data *qd, + struct fs_disk_quota *fdq) { struct inode *inode = &ip->i_inode; struct address_space *mapping = inode->i_mapping; unsigned long index = loc >> PAGE_CACHE_SHIFT; unsigned offset = loc & (PAGE_CACHE_SIZE - 1); unsigned blocksize, iblock, pos; - struct buffer_head *bh; + struct buffer_head *bh, *dibh; struct page *page; void *kaddr; struct gfs2_quota *qp; s64 value; int err = -EIO; + u64 size; if (gfs2_is_stuffed(ip)) gfs2_unstuff_dinode(ip, NULL); @@ -683,9 +686,34 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, value = (s64)be64_to_cpu(qp->qu_value) + change; qp->qu_value = cpu_to_be64(value); qd->qd_qb.qb_value = qp->qu_value; + if (fdq) { + if (fdq->d_fieldmask & FS_DQ_BSOFT) { + qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit); + qd->qd_qb.qb_warn = qp->qu_warn; + } + if (fdq->d_fieldmask & FS_DQ_BHARD) { + qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit); + qd->qd_qb.qb_limit = qp->qu_limit; + } + } flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - err = 0; + + err = gfs2_meta_inode_buffer(ip, &dibh); + if (err) + goto unlock; + + size = loc + sizeof(struct gfs2_quota); + if (size > inode->i_size) { + ip->i_disksize = size; + i_size_write(inode, size); + } + inode->i_mtime = inode->i_atime = CURRENT_TIME; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + mark_inode_dirty(inode); + unlock: unlock_page(page); page_cache_release(page); @@ -713,6 +741,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) return -ENOMEM; sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); + mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA); for (qx = 0; qx < num_qd; qx++) { error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); @@ -768,8 +797,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) for (x = 0; x < num_qd; x++) { qd = qda[x]; offset = qd2offset(qd); - error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, - (struct gfs2_quota_data *)qd); + error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL); if (error) goto out_end_trans; @@ -789,20 +817,44 @@ out_gunlock: out: while (qx--) gfs2_glock_dq_uninit(&ghs[qx]); + mutex_unlock(&ip->i_inode.i_mutex); kfree(ghs); gfs2_log_flush(ip->i_gl->gl_sbd, ip->i_gl); return error; } +static int update_qd(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd) +{ + struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); + struct gfs2_quota q; + struct gfs2_quota_lvb *qlvb; + loff_t pos; + int error; + + memset(&q, 0, sizeof(struct gfs2_quota)); + pos = qd2offset(qd); + error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q)); + if (error < 0) + return error; + + qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; + qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); + qlvb->__pad = 0; + qlvb->qb_limit = q.qu_limit; + qlvb->qb_warn = q.qu_warn; + qlvb->qb_value = q.qu_value; + qd->qd_qb = *qlvb; + + return 0; +} + static int do_glock(struct gfs2_quota_data *qd, int force_refresh, struct gfs2_holder *q_gh) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); struct gfs2_holder i_gh; - struct gfs2_quota q; int error; - struct gfs2_quota_lvb *qlvb; restart: error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh); @@ -812,7 +864,6 @@ restart: qd->qd_qb = *(struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; if (force_refresh || qd->qd_qb.qb_magic != cpu_to_be32(GFS2_MAGIC)) { - loff_t pos; gfs2_glock_dq_uninit(q_gh); error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, q_gh); @@ -823,25 +874,11 @@ restart: if (error) goto fail; - memset(&q, 0, sizeof(struct gfs2_quota)); - pos = qd2offset(qd); - error = gfs2_internal_read(ip, NULL, (char *)&q, &pos, sizeof(q)); - if (error < 0) - goto fail_gunlock; - if ((error < sizeof(q)) && force_refresh) { - error = -ENOENT; + error = update_qd(sdp, qd); + if (error) goto fail_gunlock; - } - gfs2_glock_dq_uninit(&i_gh); - - qlvb = (struct gfs2_quota_lvb *)qd->qd_gl->gl_lvb; - qlvb->qb_magic = cpu_to_be32(GFS2_MAGIC); - qlvb->__pad = 0; - qlvb->qb_limit = q.qu_limit; - qlvb->qb_warn = q.qu_warn; - qlvb->qb_value = q.qu_value; - qd->qd_qb = *qlvb; + gfs2_glock_dq_uninit(&i_gh); gfs2_glock_dq_uninit(q_gh); force_refresh = 0; goto restart; @@ -1409,9 +1446,118 @@ out: return error; } +/* GFS2 only supports a subset of the XFS fields */ +#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) + +static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id, + struct fs_disk_quota *fdq) +{ + struct gfs2_sbd *sdp = sb->s_fs_info; + struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode); + struct gfs2_quota_data *qd; + struct gfs2_holder q_gh, i_gh; + unsigned int data_blocks, ind_blocks; + unsigned int blocks = 0; + int alloc_required; + struct gfs2_alloc *al; + loff_t offset; + int error; + + if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + return -ESRCH; /* Crazy XFS error code */ + + switch(type) { + case USRQUOTA: + type = QUOTA_USER; + if (fdq->d_flags != XFS_USER_QUOTA) + return -EINVAL; + break; + case GRPQUOTA: + type = QUOTA_GROUP; + if (fdq->d_flags != XFS_GROUP_QUOTA) + return -EINVAL; + break; + default: + return -EINVAL; + } + + if (fdq->d_fieldmask & ~GFS2_FIELDMASK) + return -EINVAL; + if (fdq->d_id != id) + return -EINVAL; + + error = qd_get(sdp, type, id, &qd); + if (error) + return error; + + mutex_lock(&ip->i_inode.i_mutex); + error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh); + if (error) + goto out_put; + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + goto out_q; + + /* Check for existing entry, if none then alloc new blocks */ + error = update_qd(sdp, qd); + if (error) + goto out_i; + + /* If nothing has changed, this is a no-op */ + if ((fdq->d_fieldmask & FS_DQ_BSOFT) && + (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn))) + fdq->d_fieldmask ^= FS_DQ_BSOFT; + if ((fdq->d_fieldmask & FS_DQ_BHARD) && + (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit))) + fdq->d_fieldmask ^= FS_DQ_BHARD; + if (fdq->d_fieldmask == 0) + goto out_i; + + offset = qd2offset(qd); + error = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota), + &alloc_required); + if (error) + goto out_i; + if (alloc_required) { + al = gfs2_alloc_get(ip); + if (al == NULL) + goto out_i; + gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), + &data_blocks, &ind_blocks); + blocks = al->al_requested = 1 + data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip); + if (error) + goto out_alloc; + } + + error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); + if (error) + goto out_release; + + /* Apply changes */ + error = gfs2_adjust_quota(ip, offset, 0, qd, fdq); + + gfs2_trans_end(sdp); +out_release: + if (alloc_required) { + gfs2_inplace_release(ip); +out_alloc: + gfs2_alloc_put(ip); + } +out_i: + gfs2_glock_dq_uninit(&i_gh); +out_q: + gfs2_glock_dq_uninit(&q_gh); +out_put: + mutex_unlock(&ip->i_inode.i_mutex); + qd_put(qd); + return error; +} + const struct quotactl_ops gfs2_quotactl_ops = { .quota_sync = gfs2_quota_sync, .get_xstate = gfs2_quota_get_xstate, .get_xquota = gfs2_xquota_get, + .set_xquota = gfs2_xquota_set, }; -- cgit v0.10.2 From 86e931a35e93d94e6e91b57cc76456e16d188ea9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 Sep 2009 12:35:17 +0100 Subject: VFS: Export dquot_send_warning Sending a message to userspace in a generic format to warn of events (e.g. quota exceeded) in the quota subsystem is a generically useful feature. This patch makes some minor changes to the send_message function from dquot.c renaming it quota_send_message, moving it to quota.c and exporting it for use by filesystems which do not use the dquot code. Signed-off-by: Steven Whitehouse diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index 8047e01..353e78a 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig @@ -17,7 +17,7 @@ config QUOTA config QUOTA_NETLINK_INTERFACE bool "Report quota messages through netlink interface" - depends on QUOTA && NET + depends on QUOTACTL && NET help If you say Y here, quota warnings (about exceeding softlimit, reaching hardlimit, etc.) will be reported through netlink interface. If unsure, diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 39b49c4..9b6ad90 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -77,10 +77,6 @@ #include #include #include /* for inode_lock, oddly enough.. */ -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE -#include -#include -#endif #include @@ -1071,73 +1067,6 @@ static void print_warning(struct dquot *dquot, const int warntype) } #endif -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - -/* Netlink family structure for quota */ -static struct genl_family quota_genl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = "VFS_DQUOT", - .version = 1, - .maxattr = QUOTA_NL_A_MAX, -}; - -/* Send warning to userspace about user which exceeded quota */ -static void send_warning(const struct dquot *dquot, const char warntype) -{ - static atomic_t seq; - struct sk_buff *skb; - void *msg_head; - int ret; - int msg_size = 4 * nla_total_size(sizeof(u32)) + - 2 * nla_total_size(sizeof(u64)); - - /* We have to allocate using GFP_NOFS as we are called from a - * filesystem performing write and thus further recursion into - * the fs to free some data could cause deadlocks. */ - skb = genlmsg_new(msg_size, GFP_NOFS); - if (!skb) { - printk(KERN_ERR - "VFS: Not enough memory to send quota warning.\n"); - return; - } - msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq), - "a_genl_family, 0, QUOTA_NL_C_WARNING); - if (!msg_head) { - printk(KERN_ERR - "VFS: Cannot store netlink header in quota warning.\n"); - goto err_out; - } - ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, dquot->dq_type); - if (ret) - goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, dquot->dq_id); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, - MAJOR(dquot->dq_sb->s_dev)); - if (ret) - goto attr_err_out; - ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, - MINOR(dquot->dq_sb->s_dev)); - if (ret) - goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); - if (ret) - goto attr_err_out; - genlmsg_end(skb, msg_head); - - genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); - return; -attr_err_out: - printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); -err_out: - kfree_skb(skb); -} -#endif /* * Write warnings to the console and send warning messages over netlink. * @@ -1145,18 +1074,20 @@ err_out: */ static void flush_warnings(struct dquot *const *dquots, char *warntype) { + struct dquot *dq; int i; - for (i = 0; i < MAXQUOTAS; i++) - if (dquots[i] && warntype[i] != QUOTA_NL_NOWARN && - !warning_issued(dquots[i], warntype[i])) { + for (i = 0; i < MAXQUOTAS; i++) { + dq = dquots[i]; + if (dq && warntype[i] != QUOTA_NL_NOWARN && + !warning_issued(dq, warntype[i])) { #ifdef CONFIG_PRINT_QUOTA_WARNING - print_warning(dquots[i], warntype[i]); -#endif -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - send_warning(dquots[i], warntype[i]); + print_warning(dq, warntype[i]); #endif + quota_send_warning(dq->dq_type, dq->dq_id, + dq->dq_sb->s_dev, warntype[i]); } + } } static int ignore_hardlimit(struct dquot *dquot) @@ -2607,12 +2538,6 @@ static int __init dquot_init(void) register_shrinker(&dqcache_shrinker); -#ifdef CONFIG_QUOTA_NETLINK_INTERFACE - if (genl_register_family("a_genl_family) != 0) - printk(KERN_ERR - "VFS: Failed to create quota netlink interface.\n"); -#endif - return 0; } module_init(dquot_init); diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 95c5b42..ee91e27 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include /* Check validity of generic quotactl commands */ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, @@ -525,3 +527,94 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special, return ret; } #endif + + +#ifdef CONFIG_QUOTA_NETLINK_INTERFACE + +/* Netlink family structure for quota */ +static struct genl_family quota_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "VFS_DQUOT", + .version = 1, + .maxattr = QUOTA_NL_A_MAX, +}; + +/** + * quota_send_warning - Send warning to userspace about exceeded quota + * @type: The quota type: USRQQUOTA, GRPQUOTA,... + * @id: The user or group id of the quota that was exceeded + * @dev: The device on which the fs is mounted (sb->s_dev) + * @warntype: The type of the warning: QUOTA_NL_... + * + * This can be used by filesystems (including those which don't use + * dquot) to send a message to userspace relating to quota limits. + * + */ + +void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype) +{ + static atomic_t seq; + struct sk_buff *skb; + void *msg_head; + int ret; + int msg_size = 4 * nla_total_size(sizeof(u32)) + + 2 * nla_total_size(sizeof(u64)); + + /* We have to allocate using GFP_NOFS as we are called from a + * filesystem performing write and thus further recursion into + * the fs to free some data could cause deadlocks. */ + skb = genlmsg_new(msg_size, GFP_NOFS); + if (!skb) { + printk(KERN_ERR + "VFS: Not enough memory to send quota warning.\n"); + return; + } + msg_head = genlmsg_put(skb, 0, atomic_add_return(1, &seq), + "a_genl_family, 0, QUOTA_NL_C_WARNING); + if (!msg_head) { + printk(KERN_ERR + "VFS: Cannot store netlink header in quota warning.\n"); + goto err_out; + } + ret = nla_put_u32(skb, QUOTA_NL_A_QTYPE, type); + if (ret) + goto attr_err_out; + ret = nla_put_u64(skb, QUOTA_NL_A_EXCESS_ID, id); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_WARNING, warntype); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MAJOR, MAJOR(dev)); + if (ret) + goto attr_err_out; + ret = nla_put_u32(skb, QUOTA_NL_A_DEV_MINOR, MINOR(dev)); + if (ret) + goto attr_err_out; + ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); + if (ret) + goto attr_err_out; + genlmsg_end(skb, msg_head); + + genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS); + return; +attr_err_out: + printk(KERN_ERR "VFS: Not enough space to compose quota message!\n"); +err_out: + kfree_skb(skb); +} +EXPORT_SYMBOL(quota_send_warning); + +static int __init quota_init(void) +{ + if (genl_register_family("a_genl_family) != 0) + printk(KERN_ERR + "VFS: Failed to create quota netlink interface.\n"); + return 0; +}; + +module_init(quota_init); +#endif + diff --git a/include/linux/quota.h b/include/linux/quota.h index 78c4889..ce9a9b2 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -376,6 +376,17 @@ static inline unsigned int dquot_generic_flag(unsigned int flags, int type) return flags >> _DQUOT_STATE_FLAGS; } +#ifdef CONFIG_QUOTA_NETLINK_INTERFACE +extern void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype); +#else +static inline void quota_send_warning(short type, unsigned int id, dev_t dev, + const char warntype) +{ + return; +} +#endif /* CONFIG_QUOTA_NETLINK_INTERFACE */ + struct quota_info { unsigned int flags; /* Flags for diskquotas on this device */ struct mutex dqio_mutex; /* lock device while I/O in progress */ -- cgit v0.10.2 From 2ec4650526c5a94d96bb760001fe0685b15988de Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 28 Sep 2009 12:49:15 +0100 Subject: GFS2: Use dquot_send_warning() This adds support to GFS2 to send quota warnings via netlink. Also it removes a stray \r which was left over from when the code used to print warnings on the console. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e8db534..1d4fc04 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include "gfs2.h" @@ -1001,7 +1002,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type) { struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; - printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\r\n", + printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n", sdp->sd_fsname, type, (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group", qd->qd_id); @@ -1038,6 +1039,10 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) if (be64_to_cpu(qd->qd_qb.qb_limit) && (s64)be64_to_cpu(qd->qd_qb.qb_limit) < value) { print_message(qd, "exceeded"); + quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? + USRQUOTA : GRPQUOTA, qd->qd_id, + sdp->sd_vfs->s_dev, QUOTA_NL_BHARDWARN); + error = -EDQUOT; break; } else if (be64_to_cpu(qd->qd_qb.qb_warn) && @@ -1045,6 +1050,9 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) time_after_eq(jiffies, qd->qd_last_warn + gfs2_tune_get(sdp, gt_quota_warn_period) * HZ)) { + quota_send_warning(test_bit(QDF_USER, &qd->qd_flags) ? + USRQUOTA : GRPQUOTA, qd->qd_id, + sdp->sd_vfs->s_dev, QUOTA_NL_BSOFTWARN); error = print_message(qd, "warning"); qd->qd_last_warn = jiffies; } -- cgit v0.10.2 From 3d3c10f2ce80d2a19e5e02023c2b7ab7086c36d5 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 20 Oct 2009 02:39:44 -0500 Subject: GFS2: Improve statfs and quota usability GFS2 now has three new mount options, statfs_quantum, quota_quantum and statfs_percent. statfs_quantum and quota_quantum simply allow you to set the tunables of the same name. Setting setting statfs_quantum to 0 will also turn on the statfs_slow tunable. statfs_percent accepts an integer between 0 and 100. Numbers between 1 and 100 will cause GFS2 to do any early sync when the local number of blocks free changes by at least statfs_percent from the totoal number of blocks free. Setting statfs_percent to 0 disables this. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 6edb423..c239b0f 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -430,6 +430,9 @@ struct gfs2_args { unsigned int ar_discard:1; /* discard requests */ unsigned int ar_errors:2; /* errors=withdraw | panic */ int ar_commit; /* Commit interval */ + int ar_statfs_quantum; /* The fast statfs interval */ + int ar_quota_quantum; /* The quota interval */ + int ar_statfs_percent; /* The % change to force sync */ }; struct gfs2_tune { @@ -558,6 +561,7 @@ struct gfs2_sbd { spinlock_t sd_statfs_spin; struct gfs2_statfs_change_host sd_statfs_master; struct gfs2_statfs_change_host sd_statfs_local; + int sd_statfs_force_sync; /* Resource group stuff */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 36b11cb..9744ee9 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -63,13 +63,10 @@ static void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_quota_warn_period = 10; gt->gt_quota_scale_num = 1; gt->gt_quota_scale_den = 1; - gt->gt_quota_quantum = 60; gt->gt_new_files_jdata = 0; gt->gt_max_readahead = 1 << 18; gt->gt_stall_secs = 600; gt->gt_complain_secs = 10; - gt->gt_statfs_quantum = 30; - gt->gt_statfs_slow = 0; } static struct gfs2_sbd *init_sbd(struct super_block *sb) @@ -1153,6 +1150,15 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit; + sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum; + if (sdp->sd_args.ar_statfs_quantum) { + sdp->sd_tune.gt_statfs_slow = 0; + sdp->sd_tune.gt_statfs_quantum = sdp->sd_args.ar_statfs_quantum; + } + else { + sdp->sd_tune.gt_statfs_slow = 1; + sdp->sd_tune.gt_statfs_quantum = 30; + } error = init_names(sdp, silent); if (error) @@ -1308,6 +1314,8 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags, args.ar_quota = GFS2_QUOTA_DEFAULT; args.ar_data = GFS2_DATA_DEFAULT; args.ar_commit = 60; + args.ar_statfs_quantum = 30; + args.ar_quota_quantum = 60; args.ar_errors = GFS2_ERRORS_DEFAULT; error = gfs2_mount_args(&args, data); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 1d4fc04..e3bf6ea 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1344,6 +1344,14 @@ static void quotad_check_trunc_list(struct gfs2_sbd *sdp) } } +void gfs2_wake_up_statfs(struct gfs2_sbd *sdp) { + if (!sdp->sd_statfs_force_sync) { + sdp->sd_statfs_force_sync = 1; + wake_up(&sdp->sd_quota_wait); + } +} + + /** * gfs2_quotad - Write cached quota changes into the quota file * @sdp: Pointer to GFS2 superblock @@ -1363,8 +1371,15 @@ int gfs2_quotad(void *data) while (!kthread_should_stop()) { /* Update the master statfs file */ - quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, - &statfs_timeo, &tune->gt_statfs_quantum); + if (sdp->sd_statfs_force_sync) { + int error = gfs2_statfs_sync(sdp->sd_vfs, 0); + quotad_error(sdp, "statfs", error); + statfs_timeo = gfs2_tune_get(sdp, gt_statfs_quantum) * HZ; + } + else + quotad_check_timeo(sdp, "statfs", gfs2_statfs_sync, t, + &statfs_timeo, + &tune->gt_statfs_quantum); /* Update quota file */ quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, @@ -1381,7 +1396,7 @@ int gfs2_quotad(void *data) spin_lock(&sdp->sd_trunc_lock); empty = list_empty(&sdp->sd_trunc_list); spin_unlock(&sdp->sd_trunc_lock); - if (empty) + if (empty && !sdp->sd_statfs_force_sync) t -= schedule_timeout(t); else t = 0; diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 025d15b..e271fa0 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -32,6 +32,8 @@ extern int gfs2_quota_init(struct gfs2_sbd *sdp); extern void gfs2_quota_cleanup(struct gfs2_sbd *sdp); extern int gfs2_quotad(void *data); +extern void gfs2_wake_up_statfs(struct gfs2_sbd *sdp); + static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e7b24d5..3fee2fd 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -70,6 +70,9 @@ enum { Opt_commit, Opt_err_withdraw, Opt_err_panic, + Opt_statfs_quantum, + Opt_statfs_percent, + Opt_quota_quantum, Opt_error, }; @@ -101,6 +104,9 @@ static const match_table_t tokens = { {Opt_commit, "commit=%d"}, {Opt_err_withdraw, "errors=withdraw"}, {Opt_err_panic, "errors=panic"}, + {Opt_statfs_quantum, "statfs_quantum=%d"}, + {Opt_statfs_percent, "statfs_percent=%d"}, + {Opt_quota_quantum, "quota_quantum=%d"}, {Opt_error, NULL} }; @@ -214,6 +220,28 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) return rv ? rv : -EINVAL; } break; + case Opt_statfs_quantum: + rv = match_int(&tmp[0], &args->ar_statfs_quantum); + if (rv || args->ar_statfs_quantum < 0) { + printk(KERN_WARNING "GFS2: statfs_quantum mount option requires a non-negative numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; + case Opt_quota_quantum: + rv = match_int(&tmp[0], &args->ar_quota_quantum); + if (rv || args->ar_quota_quantum <= 0) { + printk(KERN_WARNING "GFS2: quota_quantum mount option requires a positive numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; + case Opt_statfs_percent: + rv = match_int(&tmp[0], &args->ar_statfs_percent); + if (rv || args->ar_statfs_percent < 0 || + args->ar_statfs_percent > 100) { + printk(KERN_WARNING "statfs_percent mount option requires a numeric argument between 0 and 100\n"); + return rv ? rv : -EINVAL; + } + break; case Opt_err_withdraw: args->ar_errors = GFS2_ERRORS_WITHDRAW; break; @@ -442,7 +470,9 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, { struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; + struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct buffer_head *l_bh; + int percent, sync_percent; int error; error = gfs2_meta_inode_buffer(l_ip, &l_bh); @@ -456,9 +486,17 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, l_sc->sc_free += free; l_sc->sc_dinodes += dinodes; gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode)); + if (m_sc->sc_free) + percent = (100 * l_sc->sc_free) / m_sc->sc_free; + else + percent = 100; spin_unlock(&sdp->sd_statfs_spin); brelse(l_bh); + sync_percent = sdp->sd_args.ar_statfs_percent; + if (sync_percent && (percent >= sync_percent || + percent <= -sync_percent)) + gfs2_wake_up_statfs(sdp); } void update_statfs(struct gfs2_sbd *sdp, struct buffer_head *m_bh, @@ -522,6 +560,7 @@ int gfs2_statfs_sync(struct super_block *sb, int type) goto out_bh2; update_statfs(sdp, m_bh, l_bh); + sdp->sd_statfs_force_sync = 0; gfs2_trans_end(sdp); @@ -1062,6 +1101,11 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) spin_lock(>->gt_spin); args.ar_commit = gt->gt_log_flush_secs; + args.ar_quota_quantum = gt->gt_quota_quantum; + if (gt->gt_statfs_slow) + args.ar_statfs_quantum = 0; + else + args.ar_statfs_quantum = gt->gt_statfs_quantum; spin_unlock(>->gt_spin); error = gfs2_mount_args(&args, data); if (error) @@ -1100,6 +1144,15 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~MS_POSIXACL; spin_lock(>->gt_spin); gt->gt_log_flush_secs = args.ar_commit; + gt->gt_quota_quantum = args.ar_quota_quantum; + if (args.ar_statfs_quantum) { + gt->gt_statfs_slow = 0; + gt->gt_statfs_quantum = args.ar_statfs_quantum; + } + else { + gt->gt_statfs_slow = 1; + gt->gt_statfs_quantum = 30; + } spin_unlock(>->gt_spin); gfs2_online_uevent(sdp); @@ -1180,7 +1233,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) { struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; - int lfsecs; + int val; if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) seq_printf(s, ",meta"); @@ -1241,9 +1294,17 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) } if (args->ar_discard) seq_printf(s, ",discard"); - lfsecs = sdp->sd_tune.gt_log_flush_secs; - if (lfsecs != 60) - seq_printf(s, ",commit=%d", lfsecs); + val = sdp->sd_tune.gt_log_flush_secs; + if (val != 60) + seq_printf(s, ",commit=%d", val); + val = sdp->sd_tune.gt_statfs_quantum; + if (val != 30) + seq_printf(s, ",statfs_quantum=%d", val); + val = sdp->sd_tune.gt_quota_quantum; + if (val != 60) + seq_printf(s, ",quota_quantum=%d", val); + if (args->ar_statfs_percent) + seq_printf(s, ",statfs_percent=%d", args->ar_statfs_percent); if (args->ar_errors != GFS2_ERRORS_DEFAULT) { const char *state; -- cgit v0.10.2 From c14f5735e724cb5338ca8298d42b1658008a10d7 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Mon, 26 Oct 2009 13:29:47 -0500 Subject: GFS2: remove division from new statfs code It's not necessary to do any 64bit division for the statfs sync code, so remove it. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 3fee2fd..b1dcfab 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -472,7 +472,8 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; struct buffer_head *l_bh; - int percent, sync_percent; + s64 x, y; + int need_sync = 0; int error; error = gfs2_meta_inode_buffer(l_ip, &l_bh); @@ -486,16 +487,16 @@ void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, l_sc->sc_free += free; l_sc->sc_dinodes += dinodes; gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode)); - if (m_sc->sc_free) - percent = (100 * l_sc->sc_free) / m_sc->sc_free; - else - percent = 100; + if (sdp->sd_args.ar_statfs_percent) { + x = 100 * l_sc->sc_free; + y = m_sc->sc_free * sdp->sd_args.ar_statfs_percent; + if (x >= y || x <= -y) + need_sync = 1; + } spin_unlock(&sdp->sd_statfs_spin); brelse(l_bh); - sync_percent = sdp->sd_args.ar_statfs_percent; - if (sync_percent && (percent >= sync_percent || - percent <= -sync_percent)) + if (need_sync) gfs2_wake_up_statfs(sdp); } -- cgit v0.10.2 From f25934c5f88655a8d5c3c40a540daed1f0e6dedc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Oct 2009 08:03:27 +0100 Subject: GFS2: add barrier/nobarrier mount options Currently gfs2 issues barrier unconditionally. There are various reasons to disable them, be that just for testing or for stupid devices flushing large battert backed caches. Add a nobarrier option that matches xfs and btrfs for this. Also add a symmetric barrier option to turn it back on at remount time. Signed-off-by: Christoph Hellwig Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index c239b0f..4792200 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -429,6 +429,7 @@ struct gfs2_args { unsigned int ar_meta:1; /* mount metafs */ unsigned int ar_discard:1; /* discard requests */ unsigned int ar_errors:2; /* errors=withdraw | panic */ + unsigned int ar_nobarrier:1; /* do not send barriers */ int ar_commit; /* Commit interval */ int ar_statfs_quantum; /* The fast statfs interval */ int ar_quota_quantum; /* The quota interval */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 9744ee9..edfee24 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1131,6 +1131,8 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent } if (sdp->sd_args.ar_posix_acl) sb->s_flags |= MS_POSIXACL; + if (sdp->sd_args.ar_nobarrier) + set_bit(SDF_NOBARRIERS, &sdp->sd_flags); sb->s_magic = GFS2_MAGIC; sb->s_op = &gfs2_super_ops; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b1dcfab..5e4b314 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -73,6 +73,8 @@ enum { Opt_statfs_quantum, Opt_statfs_percent, Opt_quota_quantum, + Opt_barrier, + Opt_nobarrier, Opt_error, }; @@ -107,6 +109,8 @@ static const match_table_t tokens = { {Opt_statfs_quantum, "statfs_quantum=%d"}, {Opt_statfs_percent, "statfs_percent=%d"}, {Opt_quota_quantum, "quota_quantum=%d"}, + {Opt_barrier, "barrier"}, + {Opt_nobarrier, "nobarrier"}, {Opt_error, NULL} }; @@ -253,6 +257,12 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) } args->ar_errors = GFS2_ERRORS_PANIC; break; + case Opt_barrier: + args->ar_nobarrier = 0; + break; + case Opt_nobarrier: + args->ar_nobarrier = 1; + break; case Opt_error: default: printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); @@ -1143,6 +1153,10 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) sb->s_flags |= MS_POSIXACL; else sb->s_flags &= ~MS_POSIXACL; + if (sdp->sd_args.ar_nobarrier) + set_bit(SDF_NOBARRIERS, &sdp->sd_flags); + else + clear_bit(SDF_NOBARRIERS, &sdp->sd_flags); spin_lock(>->gt_spin); gt->gt_log_flush_secs = args.ar_commit; gt->gt_quota_quantum = args.ar_quota_quantum; -- cgit v0.10.2 From cdcfde62dac64c86ff34e483c595d568a252c433 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 30 Oct 2009 10:48:53 +0000 Subject: GFS2: Display nobarrier option in /proc/mounts Since the default is barriers on, this only displays the nobarrier option when that is active. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 5e4b314..c282ad4 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1336,6 +1336,9 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) } seq_printf(s, ",errors=%s", state); } + if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) + seq_printf(s, ",nobarrier"); + return 0; } -- cgit v0.10.2 From 1579343a73e32b5886e186e8f3e4db85e420ed3f Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 6 Nov 2009 11:06:37 +0000 Subject: GFS2: Remove dirent_first() function This function only had one caller left, and that caller only called it for leaf blocks, hence one branch of the "if" was never taken. In addition the call to get_left had already verified the metadata type, so the function can be reduced to a single line of code in its caller. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 297d7e5..25fddc1 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -525,38 +525,6 @@ consist_inode: return ERR_PTR(-EIO); } - -/** - * dirent_first - Return the first dirent - * @dip: the directory - * @bh: The buffer - * @dent: Pointer to list of dirents - * - * return first dirent whether bh points to leaf or stuffed dinode - * - * Returns: IS_LEAF, IS_DINODE, or -errno - */ - -static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh, - struct gfs2_dirent **dent) -{ - struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data; - - if (be32_to_cpu(h->mh_type) == GFS2_METATYPE_LF) { - if (gfs2_meta_check(GFS2_SB(&dip->i_inode), bh)) - return -EIO; - *dent = (struct gfs2_dirent *)(bh->b_data + - sizeof(struct gfs2_leaf)); - return IS_LEAF; - } else { - if (gfs2_metatype_check(GFS2_SB(&dip->i_inode), bh, GFS2_METATYPE_DI)) - return -EIO; - *dent = (struct gfs2_dirent *)(bh->b_data + - sizeof(struct gfs2_dinode)); - return IS_DINODE; - } -} - static int dirent_check_reclen(struct gfs2_inode *dip, const struct gfs2_dirent *d, const void *end_p) { @@ -1006,7 +974,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) divider = (start + half_len) << (32 - dip->i_depth); /* Copy the entries */ - dirent_first(dip, obh, &dent); + dent = (struct gfs2_dirent *)(obh->b_data + sizeof(struct gfs2_leaf)); do { next = dent; -- cgit v0.10.2 From 2c77634965ee28c8b4790ffb5e83dd5ff7ac8988 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 6 Nov 2009 11:10:51 +0000 Subject: GFS2: Locking order fix in gfs2_check_blk_state In some cases we already have the rindex lock when we enter this function. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 8f1cfb0..0608f49 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -1710,11 +1710,16 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) { struct gfs2_rgrpd *rgd; struct gfs2_holder ri_gh, rgd_gh; + struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); + int ri_locked = 0; int error; - error = gfs2_rindex_hold(sdp, &ri_gh); - if (error) - goto fail; + if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + goto fail; + ri_locked = 1; + } error = -EINVAL; rgd = gfs2_blk2rgrpd(sdp, no_addr); @@ -1730,7 +1735,8 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type) gfs2_glock_dq_uninit(&rgd_gh); fail_rindex: - gfs2_glock_dq_uninit(&ri_gh); + if (ri_locked) + gfs2_glock_dq_uninit(&ri_gh); fail: return error; } -- cgit v0.10.2 From 0ab7d13fcbd7ce1658c563e345990ba453719deb Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 6 Nov 2009 16:20:51 +0000 Subject: GFS2: Tag all metadata with jid There are two spare field in the header common to all GFS2 metadata. One is just the right size to fit a journal id in it, and this patch updates the journal code so that each time a metadata block is modified, we tag it with the journal id of the node which is performing the modification. The reason for this is that it should make it much easier to debug issues which arise if we can tell which node was the last to modify a particular metadata block. Since the field is updated before the block is written into the journal, each journal should only contain metadata which is tagged with its own journal id. The one exception to this is the journal header block, which might have a different node's id in it, if that journal was recovered by another node in the cluster. Thus each journal will contain a record of which nodes recovered it, via the journal header. The other field in the metadata header could potentially be used to hold information about what kind of operation was performed, but for the time being we just zero it on each transaction so that if we use it for that in future, we'll know that the information (where it exists) is reliable. I did consider using the other field to hold the journal sequence number, however since in GFS2's journaling we write the modified data into the journal and not the original data, this gives no information as to what action caused the modification, so I think we can probably come up with a better use for those 64 bits in the future. Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6380cd9..26ba2a4 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -947,9 +947,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC); str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI); - str->di_header.__pad0 = 0; str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI); - str->di_header.__pad1 = 0; str->di_num.no_addr = cpu_to_be64(ip->i_no_addr); str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino); str->di_mode = cpu_to_be32(ip->i_inode.i_mode); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 13c6237..4511b08 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -596,7 +596,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) memset(lh, 0, sizeof(struct gfs2_log_header)); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); + lh->lh_header.__pad0 = cpu_to_be64(0); lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); + lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++); lh->lh_flags = cpu_to_be32(flags); lh->lh_tail = cpu_to_be32(tail); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 9969ff0..de97632 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -132,6 +132,7 @@ static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); + struct gfs2_meta_header *mh; struct gfs2_trans *tr; lock_buffer(bd->bd_bh); @@ -148,6 +149,9 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); gfs2_meta_check(sdp, bd->bd_bh); gfs2_pin(sdp, bd->bd_bh); + mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; + mh->__pad0 = cpu_to_be64(0); + mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); sdp->sd_log_num_buf++; list_add(&le->le_list, &sdp->sd_log_le_buf); tr->tr_num_buf_new++; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 09fa319..4b9bece 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -410,7 +410,9 @@ static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *hea memset(lh, 0, sizeof(struct gfs2_log_header)); lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); + lh->lh_header.__pad0 = cpu_to_be64(0); lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); + lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1); lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT); lh->lh_blkno = cpu_to_be32(lblock); diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index b80c88d..81f90a5 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -81,7 +81,11 @@ struct gfs2_meta_header { __be32 mh_type; __be64 __pad0; /* Was generation number in gfs1 */ __be32 mh_format; - __be32 __pad1; /* Was incarnation number in gfs1 */ + /* This union is to keep userspace happy */ + union { + __be32 mh_jid; /* Was incarnation number in gfs1 */ + __be32 __pad1; + }; }; /* -- cgit v0.10.2 From 9ae3c6de6981a1e8765b5d029f94555fc0f0fea0 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 10 Nov 2009 12:54:56 -0600 Subject: GFS2: drop rindex glock to refresh rindex list When a gfs2 filesystem is grown, it needs to rebuild the rindex list to be able to use the new space. gfs2 does this when the rindex is marked not uptodate, which happens when the rindex glock is dropped. However, on a single node setup, there is never any reason to drop the rindex glock, so gfs2 never invalidates the the rindex. This patch makes gfs2 automatically drop the rindex glock after filesystem grows, so it can refresh the rindex list. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 694b5d4..dce062a 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -819,8 +819,10 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, mark_inode_dirty(inode); } - if (inode == sdp->sd_rindex) + if (inode == sdp->sd_rindex) { adjust_fs_space(inode); + ip->i_gh.gh_flags |= GL_NOCACHE; + } brelse(dibh); gfs2_trans_end(sdp); @@ -889,8 +891,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, mark_inode_dirty(inode); } - if (inode == sdp->sd_rindex) + if (inode == sdp->sd_rindex) { adjust_fs_space(inode); + ip->i_gh.gh_flags |= GL_NOCACHE; + } brelse(dibh); gfs2_trans_end(sdp); -- cgit v0.10.2 From c29cd9004e72acb5a6cb8caf08508f1c5edee686 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 18 Nov 2009 18:09:41 +0800 Subject: writeback: remove unused nonblocking and congestion checks (gfs2) No one is calling wb_writeback and write_cache_pages with wbc.nonblocking=1 any more. And lumpy pageout will want to do nonblocking writeback without the congestion wait. Signed-off-by: Wu Fengguang Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index dce062a..7b8da94 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -269,7 +269,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; unsigned offset = i_size & (PAGE_CACHE_SIZE-1); unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); - struct backing_dev_info *bdi = mapping->backing_dev_info; int i; int ret; @@ -313,11 +312,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, if (ret || (--(wbc->nr_to_write) <= 0)) ret = 1; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - ret = 1; - } - } gfs2_trans_end(sdp); return ret; @@ -338,7 +332,6 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, static int gfs2_write_cache_jdata(struct address_space *mapping, struct writeback_control *wbc) { - struct backing_dev_info *bdi = mapping->backing_dev_info; int ret = 0; int done = 0; struct pagevec pvec; @@ -348,11 +341,6 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, int scanned = 0; int range_whole = 0; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - pagevec_init(&pvec, 0); if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ -- cgit v0.10.2 From 26bb7505cf7db3560286be9f6384b6d3911f78b5 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Fri, 27 Nov 2009 10:31:11 +0000 Subject: GFS2: Fix glock refcount issues This patch fixes some ref counting issues. Firstly by moving the point at which we drop the ref count after a dlm lock operation has completed we ensure that we never call gfs2_glock_hold() on a lock with a zero ref count. Secondly, by using atomic_dec_and_lock() in gfs2_glock_put() we ensure that at no time will a glock with zero ref count appear on the lru_list. That means that we can remove the check for this in our shrinker (which was racy). Signed-off-by: Steven Whitehouse diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a3f90ad..f455a03 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -241,15 +241,14 @@ int gfs2_glock_put(struct gfs2_glock *gl) int rv = 0; write_lock(gl_lock_addr(gl->gl_hash)); - if (atomic_dec_and_test(&gl->gl_ref)) { + if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { hlist_del(&gl->gl_list); - write_unlock(gl_lock_addr(gl->gl_hash)); - spin_lock(&lru_lock); if (!list_empty(&gl->gl_lru)) { list_del_init(&gl->gl_lru); atomic_dec(&lru_count); } spin_unlock(&lru_lock); + write_unlock(gl_lock_addr(gl->gl_hash)); GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); glock_free(gl); rv = 1; @@ -513,7 +512,6 @@ retry: GLOCK_BUG_ON(gl, 1); } spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); return; } @@ -524,8 +522,6 @@ retry: if (glops->go_xmote_bh) { spin_unlock(&gl->gl_spin); rv = glops->go_xmote_bh(gl, gh); - if (rv == -EAGAIN) - return; spin_lock(&gl->gl_spin); if (rv) { do_error(gl, rv); @@ -540,7 +536,6 @@ out: clear_bit(GLF_LOCK, &gl->gl_flags); out_locked: spin_unlock(&gl->gl_spin); - gfs2_glock_put(gl); } static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock, @@ -600,7 +595,6 @@ __acquires(&gl->gl_spin) if (!(ret & LM_OUT_ASYNC)) { finish_xmote(gl, ret); - gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } else { @@ -712,9 +706,12 @@ static void glock_work_func(struct work_struct *work) { unsigned long delay = 0; struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); + int drop_ref = 0; - if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) + if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { finish_xmote(gl, gl->gl_reply); + drop_ref = 1; + } down_read(&gfs2_umount_flush_sem); spin_lock(&gl->gl_spin); if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && @@ -732,6 +729,8 @@ static void glock_work_func(struct work_struct *work) if (!delay || queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) gfs2_glock_put(gl); + if (drop_ref) + gfs2_glock_put(gl); } /** @@ -1366,10 +1365,6 @@ static int gfs2_shrink_glock_memory(int nr, gfp_t gfp_mask) list_del_init(&gl->gl_lru); atomic_dec(&lru_count); - /* Check if glock is about to be freed */ - if (atomic_read(&gl->gl_ref) == 0) - continue; - /* Test for being demotable */ if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { gfs2_glock_hold(gl); -- cgit v0.10.2 From 7d1849aff6687a135a8da3a75e32a00e3137a5e2 Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Thu, 3 Dec 2009 15:52:44 +0100 Subject: x86, apic: Enable lapic nmi watchdog on AMD Family 11h The x86 lapic nmi watchdog does not recognize AMD Family 11h, resulting in: NMI watchdog: CPU not supported As far as I can see from available documentation (the BKDM), family 11h looks identical to family 10h as far as the PMU is concerned. Extending the check to accept family 11h results in: Testing NMI watchdog ... OK. I've been running with this change on a Turion X2 Ultra ZM-82 laptop for a couple of weeks now without problems. Signed-off-by: Mikael Pettersson Cc: Andreas Herrmann Cc: Joerg Roedel Cc: LKML-Reference: <19223.53436.931768.278021@pilspetsen.it.uu.se> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fab786f..898df97 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -712,7 +712,7 @@ static void probe_nmi_watchdog(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && - boot_cpu_data.x86 != 16) + boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) return; wd_ops = &k7_wd_ops; break; -- cgit v0.10.2 From 38938c879eb0c39edf85d5164aa0cffe2874304c Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 4 Dec 2009 17:44:50 -0800 Subject: Add support for GCC-4.5's __builtin_unreachable() to compiler.h (v2) Starting with version 4.5, GCC has a new built-in function __builtin_unreachable() that can be used in places like the kernel's BUG() where inline assembly is used to transfer control flow. This eliminated the need for an endless loop in these places. The patch adds a new macro 'unreachable()' that will expand to either __builtin_unreachable() or an endless loop depending on the compiler version. Change from v1: Simplify unreachable() for non-GCC 4.5 case. Signed-off-by: David Daney Acked-by: Ralf Baechle Signed-off-by: Linus Torvalds diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 450fa59..ab3af40 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -36,4 +36,18 @@ the kernel context */ #define __cold __attribute__((__cold__)) + +#if __GNUC_MINOR__ >= 5 +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + * + * Early snapshots of gcc 4.5 don't support this and we can't detect + * this in the preprocessor, but we can live with this because they're + * unreleased. Really, we need to have autoconf for the kernel. + */ +#define unreachable() __builtin_unreachable() +#endif + #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 04fb513..59f2089 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -144,6 +144,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define barrier() __memory_barrier() #endif +/* Unreachable code */ +#ifndef unreachable +# define unreachable() do { } while (1) +#endif + #ifndef RELOC_HIDE # define RELOC_HIDE(ptr, off) \ ({ unsigned long __ptr; \ -- cgit v0.10.2 From a5fc5eba4dfcc284e6adcd7fdcd5b43182230d2b Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 4 Dec 2009 17:44:51 -0800 Subject: x86: Convert BUG() to use unreachable() Use the new unreachable() macro instead of for(;;);. When allyesconfig is built with a GCC-4.5 snapshot on i686 the size of the text segment is reduced by 3987 bytes (from 6827019 to 6823032). Signed-off-by: David Daney Acked-by: "H. Peter Anvin" CC: Thomas Gleixner CC: Ingo Molnar CC: x86@kernel.org Signed-off-by: Linus Torvalds diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index d9cf1cd..f654d1b 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -22,14 +22,14 @@ do { \ ".popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (sizeof(struct bug_entry))); \ - for (;;) ; \ + unreachable(); \ } while (0) #else #define BUG() \ do { \ asm volatile("ud2"); \ - for (;;) ; \ + unreachable(); \ } while (0) #endif -- cgit v0.10.2 From 4ef5651e85589e3aaca704c6d2d7ae7e794e5d25 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 4 Dec 2009 17:44:52 -0800 Subject: MIPS: Convert BUG() to use unreachable() Use the new unreachable() macro instead of while(1); Signed-off-by: David Daney Acked-by: Ralf Baechle CC: linux-mips@linux-mips.org Signed-off-by: Linus Torvalds diff --git a/arch/mips/include/asm/bug.h b/arch/mips/include/asm/bug.h index 6cf29c2..540c98a 100644 --- a/arch/mips/include/asm/bug.h +++ b/arch/mips/include/asm/bug.h @@ -11,9 +11,7 @@ static inline void __noreturn BUG(void) { __asm__ __volatile__("break %0" : : "i" (BRK_BUG)); - /* Fool GCC into thinking the function doesn't return. */ - while (1) - ; + unreachable(); } #define HAVE_ARCH_BUG -- cgit v0.10.2 From 5506e68975c346e55f786b554e28e368cdede444 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 4 Dec 2009 17:44:53 -0800 Subject: s390: Convert BUG() to use unreachable() Use the new unreachable() macro instead of for(;;); Signed-off-by: David Daney Acked-by: Martin Schwidefsky CC: Heiko Carstens CC: linux390@de.ibm.com CC: linux-s390@vger.kernel.org Signed-off-by: Linus Torvalds diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h index 7efd0ab..efb74fd 100644 --- a/arch/s390/include/asm/bug.h +++ b/arch/s390/include/asm/bug.h @@ -49,7 +49,7 @@ #define BUG() do { \ __EMIT_BUG(0); \ - for (;;); \ + unreachable(); \ } while (0) #define WARN_ON(x) ({ \ -- cgit v0.10.2 From 27d16d08717faeaa8afd1b736a096dbaab90f08e Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 4 Dec 2009 17:44:54 -0800 Subject: avr32: Convert BUG() to use unreachable() Use the new unreachable() macro instead of for(;;); Signed-off-by: David Daney Acked-by: Haavard Skinnemoen Signed-off-by: Linus Torvalds diff --git a/arch/avr32/include/asm/bug.h b/arch/avr32/include/asm/bug.h index 331d45b..2aa373cc 100644 --- a/arch/avr32/include/asm/bug.h +++ b/arch/avr32/include/asm/bug.h @@ -52,7 +52,7 @@ #define BUG() \ do { \ _BUG_OR_WARN(0); \ - for (;;); \ + unreachable(); \ } while (0) #define WARN_ON(condition) \ -- cgit v0.10.2 From 7a5b4e16c880f8350d255dc188f81622905618c1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Oct 2009 14:55:53 +0100 Subject: ARM: sa11x0: convert set_xxx_data() to register_xxx() Only register devices if we have platform data for those which require platform data. Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index 55e6447..169e5b8 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -249,10 +249,10 @@ static void __init assabet_init(void) #endif } - sa11x0_set_flash_data(&assabet_flash_data, assabet_flash_resources, - ARRAY_SIZE(assabet_flash_resources)); - sa11x0_set_irda_data(&assabet_irda_data); - sa11x0_set_mcp_data(&assabet_mcp_data); + sa11x0_register_mtd(&assabet_flash_data, assabet_flash_resources, + ARRAY_SIZE(assabet_flash_resources)); + sa11x0_register_irda(&assabet_irda_data); + sa11x0_register_mcp(&assabet_mcp_data); } /* diff --git a/arch/arm/mach-sa1100/badge4.c b/arch/arm/mach-sa1100/badge4.c index ab5883b..051ec0f 100644 --- a/arch/arm/mach-sa1100/badge4.c +++ b/arch/arm/mach-sa1100/badge4.c @@ -212,7 +212,7 @@ static int __init badge4_init(void) /* maybe turn on 5v0 from the start */ badge4_set_5V(BADGE4_5V_INITIALLY, five_v_on); - sa11x0_set_flash_data(&badge4_flash_data, &badge4_flash_resource, 1); + sa11x0_register_mtd(&badge4_flash_data, &badge4_flash_resource, 1); return 0; } diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c index fd3ad9c..bc950ef 100644 --- a/arch/arm/mach-sa1100/cerf.c +++ b/arch/arm/mach-sa1100/cerf.c @@ -129,8 +129,8 @@ static struct mcp_plat_data cerf_mcp_data = { static void __init cerf_init(void) { platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices)); - sa11x0_set_flash_data(&cerf_flash_data, &cerf_flash_resource, 1); - sa11x0_set_mcp_data(&cerf_mcp_data); + sa11x0_register_mtd(&cerf_flash_data, &cerf_flash_resource, 1); + sa11x0_register_mcp(&cerf_mcp_data); } MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube") diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index bbf2ebc..be7e5a3 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -272,9 +272,9 @@ static void __init collie_init(void) printk(KERN_WARNING "collie: Unable to register LoCoMo device\n"); } - sa11x0_set_flash_data(&collie_flash_data, collie_flash_resources, - ARRAY_SIZE(collie_flash_resources)); - sa11x0_set_mcp_data(&collie_mcp_data); + sa11x0_register_mtd(&collie_flash_data, collie_flash_resources, + ARRAY_SIZE(collie_flash_resources)); + sa11x0_register_mcp(&collie_mcp_data); sharpsl_save_param(); } diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 23cfdd5..9faea15 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -162,6 +162,17 @@ static void sa1100_power_off(void) PMCR = PMCR_SF; } +static void sa11x0_register_device(struct platform_device *dev, void *data) +{ + int err; + dev->dev.platform_data = data; + err = platform_device_register(dev); + if (err) + printk(KERN_ERR "Unable to register device %s: %d\n", + dev->name, err); +} + + static struct resource sa11x0udc_resources[] = { [0] = { .start = 0x80000000, @@ -234,9 +245,9 @@ static struct platform_device sa11x0mcp_device = { .resource = sa11x0mcp_resources, }; -void sa11x0_set_mcp_data(struct mcp_plat_data *data) +void sa11x0_register_mcp(struct mcp_plat_data *data) { - sa11x0mcp_device.dev.platform_data = data; + sa11x0_register_device(&sa11x0mcp_device, data); } static struct resource sa11x0ssp_resources[] = { @@ -293,13 +304,13 @@ static struct platform_device sa11x0mtd_device = { .id = -1, }; -void sa11x0_set_flash_data(struct flash_platform_data *flash, - struct resource *res, int nr) +void sa11x0_register_mtd(struct flash_platform_data *flash, + struct resource *res, int nr) { flash->name = "sa1100"; - sa11x0mtd_device.dev.platform_data = flash; sa11x0mtd_device.resource = res; sa11x0mtd_device.num_resources = nr; + sa11x0_register_device(&sa11x0mtd_device, flash); } static struct resource sa11x0ir_resources[] = { @@ -329,9 +340,9 @@ static struct platform_device sa11x0ir_device = { .resource = sa11x0ir_resources, }; -void sa11x0_set_irda_data(struct irda_platform_data *irda) +void sa11x0_register_irda(struct irda_platform_data *irda) { - sa11x0ir_device.dev.platform_data = irda; + sa11x0_register_device(&sa11x0ir_device, irda); } static struct platform_device sa11x0rtc_device = { @@ -343,21 +354,15 @@ static struct platform_device *sa11x0_devices[] __initdata = { &sa11x0udc_device, &sa11x0uart1_device, &sa11x0uart3_device, - &sa11x0mcp_device, &sa11x0ssp_device, &sa11x0pcmcia_device, &sa11x0fb_device, - &sa11x0mtd_device, &sa11x0rtc_device, }; static int __init sa1100_init(void) { pm_power_off = sa1100_power_off; - - if (sa11x0ir_device.dev.platform_data) - platform_device_register(&sa11x0ir_device); - return platform_add_devices(sa11x0_devices, ARRAY_SIZE(sa11x0_devices)); } diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h index 793c2e6..ec03f18 100644 --- a/arch/arm/mach-sa1100/generic.h +++ b/arch/arm/mach-sa1100/generic.h @@ -32,14 +32,11 @@ extern unsigned int sa11x0_ppcr_to_freq(unsigned int idx); struct flash_platform_data; struct resource; -extern void sa11x0_set_flash_data(struct flash_platform_data *flash, - struct resource *res, int nr); - -struct sa11x0_ssp_plat_ops; -extern void sa11x0_set_ssp_data(struct sa11x0_ssp_plat_ops *ops); +void sa11x0_register_mtd(struct flash_platform_data *flash, + struct resource *res, int nr); struct irda_platform_data; -void sa11x0_set_irda_data(struct irda_platform_data *irda); +void sa11x0_register_irda(struct irda_platform_data *irda); struct mcp_plat_data; -void sa11x0_set_mcp_data(struct mcp_plat_data *data); +void sa11x0_register_mcp(struct mcp_plat_data *data); diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 0eb2f15..0b4b870 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -102,8 +102,8 @@ static struct irda_platform_data h3600_irda_data = { static void h3xxx_mach_init(void) { - sa11x0_set_flash_data(&h3xxx_flash_data, &h3xxx_flash_resource, 1); - sa11x0_set_irda_data(&h3600_irda_data); + sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1); + sa11x0_register_irda(&h3600_irda_data); } /* diff --git a/arch/arm/mach-sa1100/hackkit.c b/arch/arm/mach-sa1100/hackkit.c index e7056c0..51568df 100644 --- a/arch/arm/mach-sa1100/hackkit.c +++ b/arch/arm/mach-sa1100/hackkit.c @@ -187,7 +187,7 @@ static struct resource hackkit_flash_resource = { static void __init hackkit_init(void) { - sa11x0_set_flash_data(&hackkit_flash_data, &hackkit_flash_resource, 1); + sa11x0_register_mtd(&hackkit_flash_data, &hackkit_flash_resource, 1); } /********************************************************************** diff --git a/arch/arm/mach-sa1100/jornada720.c b/arch/arm/mach-sa1100/jornada720.c index fd776bb..13ebd2d 100644 --- a/arch/arm/mach-sa1100/jornada720.c +++ b/arch/arm/mach-sa1100/jornada720.c @@ -354,7 +354,7 @@ static struct resource jornada720_flash_resource = { static void __init jornada720_mach_init(void) { - sa11x0_set_flash_data(&jornada720_flash_data, &jornada720_flash_resource, 1); + sa11x0_register_mtd(&jornada720_flash_data, &jornada720_flash_resource, 1); } MACHINE_START(JORNADA720, "HP Jornada 720") diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c index 1f940df..68069d6 100644 --- a/arch/arm/mach-sa1100/lart.c +++ b/arch/arm/mach-sa1100/lart.c @@ -28,7 +28,7 @@ static struct mcp_plat_data lart_mcp_data = { static void __init lart_init(void) { - sa11x0_set_mcp_data(&lart_mcp_data); + sa11x0_register_mcp(&lart_mcp_data); } static struct map_desc lart_io_desc[] __initdata = { diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c index e1458bc..1ccd601 100644 --- a/arch/arm/mach-sa1100/pleb.c +++ b/arch/arm/mach-sa1100/pleb.c @@ -109,7 +109,7 @@ static struct flash_platform_data pleb_flash_data = { static void __init pleb_init(void) { - sa11x0_set_flash_data(&pleb_flash_data, pleb_flash_resources, + sa11x0_register_mtd(&pleb_flash_data, pleb_flash_resources, ARRAY_SIZE(pleb_flash_resources)); diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c index ddd917d..85e82bb 100644 --- a/arch/arm/mach-sa1100/shannon.c +++ b/arch/arm/mach-sa1100/shannon.c @@ -59,8 +59,8 @@ static struct mcp_plat_data shannon_mcp_data = { static void __init shannon_init(void) { - sa11x0_set_flash_data(&shannon_flash_data, &shannon_flash_resource, 1); - sa11x0_set_mcp_data(&shannon_mcp_data); + sa11x0_register_mtd(&shannon_flash_data, &shannon_flash_resource, 1); + sa11x0_register_mcp(&shannon_mcp_data); } static void __init shannon_map_io(void) diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index 3c74534..49cfd64 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c @@ -166,9 +166,9 @@ static void __init simpad_map_io(void) PCFR = 0; PSDR = 0; - sa11x0_set_flash_data(&simpad_flash_data, simpad_flash_resources, + sa11x0_register_mtd(&simpad_flash_data, simpad_flash_resources, ARRAY_SIZE(simpad_flash_resources)); - sa11x0_set_mcp_data(&simpad_mcp_data); + sa11x0_register_mcp(&simpad_mcp_data); } static void simpad_power_off(void) -- cgit v0.10.2 From 898e810eadb16670c800b3279800bda3a822c6d7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Oct 2009 14:19:44 +0100 Subject: ARM: h3600: provide each iPAQ machine type with own init function Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 0b4b870..a91abd3 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -321,6 +321,11 @@ static void __init h3100_map_io(void) assign_h3600_egpio = h3100_control_egpio; } +static void h3100_mach_init(void) +{ + h3xxx_mach_init(); +} + MACHINE_START(H3100, "Compaq iPAQ H3100") .phys_io = 0x80000000, .io_pg_offst = ((0xf8000000) >> 18) & 0xfffc, @@ -328,7 +333,7 @@ MACHINE_START(H3100, "Compaq iPAQ H3100") .map_io = h3100_map_io, .init_irq = sa1100_init_irq, .timer = &sa1100_timer, - .init_machine = h3xxx_mach_init, + .init_machine = h3100_mach_init, MACHINE_END #endif /* CONFIG_SA1100_H3100 */ @@ -418,6 +423,11 @@ static void __init h3600_map_io(void) assign_h3600_egpio = h3600_control_egpio; } +static void h3600_mach_init(void) +{ + h3xxx_mach_init(); +} + MACHINE_START(H3600, "Compaq iPAQ H3600") .phys_io = 0x80000000, .io_pg_offst = ((0xf8000000) >> 18) & 0xfffc, @@ -425,7 +435,7 @@ MACHINE_START(H3600, "Compaq iPAQ H3600") .map_io = h3600_map_io, .init_irq = sa1100_init_irq, .timer = &sa1100_timer, - .init_machine = h3xxx_mach_init, + .init_machine = h3600_mach_init, MACHINE_END #endif /* CONFIG_SA1100_H3600 */ -- cgit v0.10.2 From a5d176a19138eef45e4c7c80a8d3a7c14c8aec53 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Oct 2009 14:22:23 +0100 Subject: ARM: iPAQ: separate IrDA machine specifics Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index a91abd3..c51432b 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -80,30 +80,9 @@ static struct resource h3xxx_flash_resource = { .flags = IORESOURCE_MEM, }; -/* - * This turns the IRDA power on or off on the Compaq H3600 - */ -static int h3600_irda_set_power(struct device *dev, unsigned int state) -{ - assign_h3600_egpio( IPAQ_EGPIO_IR_ON, state ); - - return 0; -} - -static void h3600_irda_set_speed(struct device *dev, unsigned int speed) -{ - assign_h3600_egpio(IPAQ_EGPIO_IR_FSEL, !(speed < 4000000)); -} - -static struct irda_platform_data h3600_irda_data = { - .set_power = h3600_irda_set_power, - .set_speed = h3600_irda_set_speed, -}; - static void h3xxx_mach_init(void) { sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1); - sa11x0_register_irda(&h3600_irda_data); } /* @@ -321,9 +300,30 @@ static void __init h3100_map_io(void) assign_h3600_egpio = h3100_control_egpio; } +/* + * This turns the IRDA power on or off on the Compaq H3100 + */ +static int h3100_irda_set_power(struct device *dev, unsigned int state) +{ + assign_h3100_egpio(IPAQ_EGPIO_IR_ON, state); + + return 0; +} + +static void h3100_irda_set_speed(struct device *dev, unsigned int speed) +{ + assign_h3100_egpio(IPAQ_EGPIO_IR_FSEL, !(speed < 4000000)); +} + +static struct irda_platform_data h3100_irda_data = { + .set_power = h3100_irda_set_power, + .set_speed = h3100_irda_set_speed, +}; + static void h3100_mach_init(void) { h3xxx_mach_init(); + sa11x0_register_irda(&h3100_irda_data); } MACHINE_START(H3100, "Compaq iPAQ H3100") @@ -423,9 +423,29 @@ static void __init h3600_map_io(void) assign_h3600_egpio = h3600_control_egpio; } +/* + * This turns the IRDA power on or off on the Compaq H3600 + */ +static int h3600_irda_set_power(struct device *dev, unsigned int state) +{ + assign_h3600_egpio(IPAQ_EGPIO_IR_ON, state); + return 0; +} + +static void h3600_irda_set_speed(struct device *dev, unsigned int speed) +{ + assign_h3600_egpio(IPAQ_EGPIO_IR_FSEL, !(speed < 4000000)); +} + +static struct irda_platform_data h3600_irda_data = { + .set_power = h3600_irda_set_power, + .set_speed = h3600_irda_set_speed, +}; + static void h3600_mach_init(void) { h3xxx_mach_init(); + sa11x0_register_irda(&h3600_irda_data); } MACHINE_START(H3600, "Compaq iPAQ H3600") -- cgit v0.10.2 From 0831e3e4cf8abcbb772c0cb1eb4406ffcdb974df Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Oct 2009 14:35:16 +0100 Subject: ARM: iPAQ: provide a way to setup platform-controlled GPIOs Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index c51432b..2b545a4 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,47 @@ void (*assign_h3600_egpio)(enum ipaq_egpio_type x, int level); EXPORT_SYMBOL(assign_h3600_egpio); +struct gpio_default_state { + int gpio; + int mode; + const char *name; +}; + +#define GPIO_MODE_IN -1 +#define GPIO_MODE_OUT0 0 +#define GPIO_MODE_OUT1 1 + +static void h3xxx_init_gpio(struct gpio_default_state *s, size_t n) +{ + while (n--) { + const char *name = s->name; + int err; + + if (!name) + name = "[init]"; + err = gpio_request(s->gpio, name); + if (err) { + printk(KERN_ERR "gpio%u: unable to request: %d\n", + s->gpio, err); + continue; + } + if (s->mode >= 0) { + err = gpio_direction_output(s->gpio, s->mode); + } else { + err = gpio_direction_input(s->gpio); + } + if (err) { + printk(KERN_ERR "gpio%u: unable to set direction: %d\n", + s->gpio, err); + continue; + } + if (!s->name) + gpio_free(s->gpio); + s++; + } +} + + static struct mtd_partition h3xxx_partitions[] = { { .name = "H3XXX boot firmware", -- cgit v0.10.2 From 9c196f0f8db928ef9a2a935d86d75a218a7131b1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Oct 2009 14:36:05 +0100 Subject: ARM: iPAQ: convert H3100 IrDA to use generic gpio support Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 2b545a4..c53ad65 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -347,14 +347,13 @@ static void __init h3100_map_io(void) */ static int h3100_irda_set_power(struct device *dev, unsigned int state) { - assign_h3100_egpio(IPAQ_EGPIO_IR_ON, state); - + gpio_set_value(H3100_GPIO_IR_ON, state); return 0; } static void h3100_irda_set_speed(struct device *dev, unsigned int speed) { - assign_h3100_egpio(IPAQ_EGPIO_IR_FSEL, !(speed < 4000000)); + gpio_set_value(H3100_GPIO_IR_FSEL, !(speed < 4000000)); } static struct irda_platform_data h3100_irda_data = { @@ -362,8 +361,14 @@ static struct irda_platform_data h3100_irda_data = { .set_speed = h3100_irda_set_speed, }; +static struct gpio_default_state h3100_default_gpio[] = { + { H3100_GPIO_IR_ON, GPIO_MODE_OUT0, "IrDA power" }, + { H3100_GPIO_IR_FSEL, GPIO_MODE_OUT0, "IrDA fsel" }, +}; + static void h3100_mach_init(void) { + h3xxx_init_gpio(h3100_default_gpio, ARRAY_SIZE(h3100_default_gpio)); h3xxx_mach_init(); sa11x0_register_irda(&h3100_irda_data); } diff --git a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h index a36ca76..d4d0b22 100644 --- a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h +++ b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h @@ -39,6 +39,15 @@ #define GPIO_H3100_IR_ON GPIO_GPIO (8) #define GPIO_H3100_IR_FSEL GPIO_GPIO (9) +/* gpiolib versions of the above */ +#define H3100_GPIO_BT_ON 2 +#define H3100_GPIO_QMUTE 4 +#define H3100_GPIO_LCD_3V_ON 5 +#define H3100_GPIO_AUD_ON 6 +#define H3100_GPIO_AUD_PWR_ON 7 +#define H3100_GPIO_IR_ON 8 +#define H3100_GPIO_IR_FSEL 9 + /* for H3600, audio sample rate clock generator */ #define GPIO_H3600_CLK_SET0 GPIO_GPIO (12) #define GPIO_H3600_CLK_SET1 GPIO_GPIO (13) -- cgit v0.10.2 From 6e21ee6aa76df6297e6dcea22d6583cafdb669f7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Oct 2009 15:16:27 +0100 Subject: ARM: iPAQ: move serial port support functions No point calling sa1100_register_uart_fns early - these aren't used until late in the boot sequence. Also convert to gpiolib support. Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index c53ad65..8a6a0e7 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -91,6 +91,9 @@ static void h3xxx_init_gpio(struct gpio_default_state *s, size_t n) } +/* + * H3xxx flash support + */ static struct mtd_partition h3xxx_partitions[] = { { .name = "H3XXX boot firmware", @@ -122,42 +125,35 @@ static struct resource h3xxx_flash_resource = { .flags = IORESOURCE_MEM, }; -static void h3xxx_mach_init(void) -{ - sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1); -} /* - * low-level UART features + * H3xxx uart support */ - -static void h3600_uart_set_mctrl(struct uart_port *port, u_int mctrl) +static void h3xxx_uart_set_mctrl(struct uart_port *port, u_int mctrl) { if (port->mapbase == _Ser3UTCR0) { - if (mctrl & TIOCM_RTS) - GPCR = GPIO_H3600_COM_RTS; - else - GPSR = GPIO_H3600_COM_RTS; + gpio_set_value(H3XXX_GPIO_COM_RTS, !(mctrl & TIOCM_RTS)); } } -static u_int h3600_uart_get_mctrl(struct uart_port *port) +static u_int h3xxx_uart_get_mctrl(struct uart_port *port) { u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR; if (port->mapbase == _Ser3UTCR0) { - int gplr = GPLR; - /* DCD and CTS bits are inverted in GPLR by RS232 transceiver */ - if (gplr & GPIO_H3600_COM_DCD) + /* + * DCD and CTS bits are inverted in GPLR by RS232 transceiver + */ + if (gpio_get_value(H3XXX_GPIO_COM_DCD)) ret &= ~TIOCM_CD; - if (gplr & GPIO_H3600_COM_CTS) + if (gpio_get_value(H3XXX_GPIO_COM_CTS)) ret &= ~TIOCM_CTS; } return ret; } -static void h3600_uart_pm(struct uart_port *port, u_int state, u_int oldstate) +static void h3xxx_uart_pm(struct uart_port *port, u_int state, u_int oldstate) { if (port->mapbase == _Ser2UTCR0) { /* TODO: REMOVE THIS */ assign_h3600_egpio(IPAQ_EGPIO_IR_ON, !state); @@ -170,7 +166,7 @@ static void h3600_uart_pm(struct uart_port *port, u_int state, u_int oldstate) * Enable/Disable wake up events for this serial port. * Obviously, we only support this on the normal COM port. */ -static int h3600_uart_set_wake(struct uart_port *port, u_int enable) +static int h3xxx_uart_set_wake(struct uart_port *port, u_int enable) { int err = -EINVAL; @@ -184,13 +180,20 @@ static int h3600_uart_set_wake(struct uart_port *port, u_int enable) return err; } -static struct sa1100_port_fns h3600_port_fns __initdata = { - .set_mctrl = h3600_uart_set_mctrl, - .get_mctrl = h3600_uart_get_mctrl, - .pm = h3600_uart_pm, - .set_wake = h3600_uart_set_wake, +static struct sa1100_port_fns h3xxx_port_fns __initdata = { + .set_mctrl = h3xxx_uart_set_mctrl, + .get_mctrl = h3xxx_uart_get_mctrl, + .pm = h3xxx_uart_pm, + .set_wake = h3xxx_uart_set_wake, }; + +static void h3xxx_mach_init(void) +{ + sa1100_register_uart_fns(&h3xxx_port_fns); + sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1); +} + /* * helper for sa1100fb */ @@ -227,7 +230,6 @@ static void __init h3xxx_map_io(void) sa1100_map_io(); iotable_init(h3600_io_desc, ARRAY_SIZE(h3600_io_desc)); - sa1100_register_uart_fns(&h3600_port_fns); sa1100_register_uart(0, 3); /* Common serial port */ // sa1100_register_uart(1, 1); /* Microcontroller on 3100/3600 */ @@ -329,7 +331,7 @@ static void __init h3100_map_io(void) /* Initialize h3100-specific values here */ GPCR = 0x0fffffff; /* All outputs are set low by default */ - GPDR = GPIO_H3600_COM_RTS | GPIO_H3600_L3_CLOCK | + GPDR = GPIO_H3600_L3_CLOCK | GPIO_H3600_L3_MODE | GPIO_H3600_L3_DATA | GPIO_H3600_CLK_SET1 | GPIO_H3600_CLK_SET0 | H3100_DIRECT_EGPIO; @@ -364,6 +366,9 @@ static struct irda_platform_data h3100_irda_data = { static struct gpio_default_state h3100_default_gpio[] = { { H3100_GPIO_IR_ON, GPIO_MODE_OUT0, "IrDA power" }, { H3100_GPIO_IR_FSEL, GPIO_MODE_OUT0, "IrDA fsel" }, + { H3XXX_GPIO_COM_DCD, GPIO_MODE_IN, "COM DCD" }, + { H3XXX_GPIO_COM_CTS, GPIO_MODE_IN, "COM CTS" }, + { H3XXX_GPIO_COM_RTS, GPIO_MODE_OUT0, "COM RTS" }, }; static void h3100_mach_init(void) @@ -460,7 +465,7 @@ static void __init h3600_map_io(void) /* Initialize h3600-specific values here */ GPCR = 0x0fffffff; /* All outputs are set low by default */ - GPDR = GPIO_H3600_COM_RTS | GPIO_H3600_L3_CLOCK | + GPDR = GPIO_H3600_L3_CLOCK | GPIO_H3600_L3_MODE | GPIO_H3600_L3_DATA | GPIO_H3600_CLK_SET1 | GPIO_H3600_CLK_SET0 | GPIO_LDD15 | GPIO_LDD14 | GPIO_LDD13 | GPIO_LDD12 | @@ -489,8 +494,15 @@ static struct irda_platform_data h3600_irda_data = { .set_speed = h3600_irda_set_speed, }; +static struct gpio_default_state h3600_default_gpio[] = { + { H3XXX_GPIO_COM_DCD, GPIO_MODE_IN, "COM DCD" }, + { H3XXX_GPIO_COM_CTS, GPIO_MODE_IN, "COM CTS" }, + { H3XXX_GPIO_COM_RTS, GPIO_MODE_OUT0, "COM RTS" }, +}; + static void h3600_mach_init(void) { + h3xxx_init_gpio(h3600_default_gpio, ARRAY_SIZE(h3600_default_gpio)); h3xxx_mach_init(); sa11x0_register_irda(&h3600_irda_data); } diff --git a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h index d4d0b22..18de674 100644 --- a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h +++ b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h @@ -24,6 +24,20 @@ #define _INCLUDE_H3600_GPIO_H_ /* + * gpiolib numbers for all iPAQs + */ +#define H3XXX_GPIO_PWR_BUTTON 0 +#define H3XXX_GPIO_PCMCIA_CD1 10 +#define H3XXX_GPIO_PCMCIA_IRQ1 11 +#define H3XXX_GPIO_PCMCIA_CD0 17 +#define H3XXX_GPIO_SYS_CLK 19 +#define H3XXX_GPIO_PCMCIA_IRQ0 21 +#define H3XXX_GPIO_COM_DCD 23 +#define H3XXX_GPIO_OPTION 24 +#define H3XXX_GPIO_COM_CTS 25 +#define H3XXX_GPIO_COM_RTS 26 + +/* * GPIO lines that are common across ALL iPAQ models are in "h3600.h" * This file contains machine-specific definitions */ -- cgit v0.10.2 From 0fb85a5aa0958b2118eab8e035731051b05775bb Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 6 Oct 2009 16:40:24 +0100 Subject: ARM: iPAQ: no need to set PWER_RTC The rtc-sa1100 driver takes care of this. Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 8a6a0e7..2f6ea17 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -239,7 +239,7 @@ static void __init h3xxx_map_io(void) /* Configure suspend conditions */ PGSR = 0; - PWER = PWER_GPIO0 | PWER_RTC; + PWER = PWER_GPIO0; PCFR = PCFR_OPDE; PSDR = 0; -- cgit v0.10.2 From e55b20e81d5c40bd04f1747450ca622b105c97a3 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 11:06:46 +0100 Subject: ARM: 5795/1: SA1100: h3100/h3600: mark *_mach_init functions as __init Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 2f6ea17..ebb3b8d 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -188,7 +188,7 @@ static struct sa1100_port_fns h3xxx_port_fns __initdata = { }; -static void h3xxx_mach_init(void) +static void __init h3xxx_mach_init(void) { sa1100_register_uart_fns(&h3xxx_port_fns); sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1); @@ -371,7 +371,7 @@ static struct gpio_default_state h3100_default_gpio[] = { { H3XXX_GPIO_COM_RTS, GPIO_MODE_OUT0, "COM RTS" }, }; -static void h3100_mach_init(void) +static void __init h3100_mach_init(void) { h3xxx_init_gpio(h3100_default_gpio, ARRAY_SIZE(h3100_default_gpio)); h3xxx_mach_init(); @@ -500,7 +500,7 @@ static struct gpio_default_state h3600_default_gpio[] = { { H3XXX_GPIO_COM_RTS, GPIO_MODE_OUT0, "COM RTS" }, }; -static void h3600_mach_init(void) +static void __init h3600_mach_init(void) { h3xxx_init_gpio(h3600_default_gpio, ARRAY_SIZE(h3600_default_gpio)); h3xxx_mach_init(); -- cgit v0.10.2 From 1e23221e98deff9ef9adad5f2f991723f080b1ee Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 11:10:57 +0100 Subject: ARM: 5797/1: SA1100: h3100/h3600: remove dead links from Kconfig help text Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig index 4e5c07f..854768fa 100644 --- a/arch/arm/mach-sa1100/Kconfig +++ b/arch/arm/mach-sa1100/Kconfig @@ -59,7 +59,6 @@ config SA1100_H3100 Linux port to this machine can be found at: - config SA1100_H3600 bool "Compaq iPAQ H3600/H3700" @@ -69,7 +68,6 @@ config SA1100_H3600 Linux port to this machine can be found at: - config SA1100_BADGE4 bool "HP Labs BadgePAD 4" -- cgit v0.10.2 From 2a151a0f12b136d28f0501fa294462f8e9e310a1 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 11:10:58 +0100 Subject: ARM: 5796/1: SA1100: h3600: remove IRDA bits from serial PM callback IRDA is handled by separate sa1100-ir driver and has nothing to do with sa1100_serial Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index ebb3b8d..5033c03 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -155,11 +155,8 @@ static u_int h3xxx_uart_get_mctrl(struct uart_port *port) static void h3xxx_uart_pm(struct uart_port *port, u_int state, u_int oldstate) { - if (port->mapbase == _Ser2UTCR0) { /* TODO: REMOVE THIS */ - assign_h3600_egpio(IPAQ_EGPIO_IR_ON, !state); - } else if (port->mapbase == _Ser3UTCR0) { + if (port->mapbase == _Ser3UTCR0) assign_h3600_egpio(IPAQ_EGPIO_RS232_ON, !state); - } } /* -- cgit v0.10.2 From 382c14a52d87f50111967f758fcb2d1f188f4415 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 11:10:59 +0100 Subject: ARM: 5798/1: SA1100: h3600: remove unused cruft from h3600.h PM_SUSPEND, PM_RESUME and machine_is_h3xxx() are not used anywhere in kernel (checked with git grep), so it's safe to remove them. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/include/mach/h3600.h b/arch/arm/mach-sa1100/include/mach/h3600.h index 2827faa..69f138c 100644 --- a/arch/arm/mach-sa1100/include/mach/h3600.h +++ b/arch/arm/mach-sa1100/include/mach/h3600.h @@ -23,14 +23,6 @@ #ifndef _INCLUDE_H3600_H_ #define _INCLUDE_H3600_H_ -typedef int __bitwise pm_request_t; - -#define PM_SUSPEND ((__force pm_request_t) 1) /* enter D1-D3 */ -#define PM_RESUME ((__force pm_request_t) 2) /* enter D0 */ - -/* generalized support for H3xxx series Compaq Pocket PC's */ -#define machine_is_h3xxx() (machine_is_h3100() || machine_is_h3600()) - /* Physical memory regions corresponding to chip selects */ #define H3600_EGPIO_PHYS (SA1100_CS5_PHYS + 0x01000000) #define H3600_BANK_2_PHYS SA1100_CS2_PHYS -- cgit v0.10.2 From d0e6041efdcbde53a325d0cf0b5bd09931524146 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 11:11:00 +0100 Subject: ARM: 5799/1: SA1100: h3600: stop setting direction for LCD pins sa1100_fb driver handles this Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 5033c03..021cecb 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -464,9 +464,7 @@ static void __init h3600_map_io(void) GPCR = 0x0fffffff; /* All outputs are set low by default */ GPDR = GPIO_H3600_L3_CLOCK | GPIO_H3600_L3_MODE | GPIO_H3600_L3_DATA | - GPIO_H3600_CLK_SET1 | GPIO_H3600_CLK_SET0 | - GPIO_LDD15 | GPIO_LDD14 | GPIO_LDD13 | GPIO_LDD12 | - GPIO_LDD11 | GPIO_LDD10 | GPIO_LDD9 | GPIO_LDD8; + GPIO_H3600_CLK_SET1 | GPIO_H3600_CLK_SET0; H3600_EGPIO = h3600_egpio; /* Maintains across sleep? */ assign_h3600_egpio = h3600_control_egpio; -- cgit v0.10.2 From 2a83709199adf4e89254e03981c0f9ff6558691f Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Sun, 6 Dec 2009 17:12:49 +0100 Subject: ARM: 5811/2: pcmcia: convert sa1100_h3600 driver to gpiolib Convert all operations with GPLR/GPCR/GPSR to gpiolibs calls. Also change all IRQ_GPIO* to gpio_to_irq(*GPIO*) Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/drivers/pcmcia/sa1100_h3600.c b/drivers/pcmcia/sa1100_h3600.c index 3a121ac69..97e5667 100644 --- a/drivers/pcmcia/sa1100_h3600.c +++ b/drivers/pcmcia/sa1100_h3600.c @@ -10,26 +10,78 @@ #include #include #include +#include #include #include #include #include +#include #include "sa1100_generic.h" static struct pcmcia_irqs irqs[] = { - { 0, IRQ_GPIO_H3600_PCMCIA_CD0, "PCMCIA CD0" }, - { 1, IRQ_GPIO_H3600_PCMCIA_CD1, "PCMCIA CD1" } + { .sock = 0, .str = "PCMCIA CD0" }, /* .irq will be filled later */ + { .sock = 1, .str = "PCMCIA CD1" } }; static int h3600_pcmcia_hw_init(struct soc_pcmcia_socket *skt) { - skt->socket.pci_irq = skt->nr ? IRQ_GPIO_H3600_PCMCIA_IRQ1 - : IRQ_GPIO_H3600_PCMCIA_IRQ0; + int err; + switch (skt->nr) { + case 0: + err = gpio_request(H3XXX_GPIO_PCMCIA_IRQ0, "PCMCIA IRQ0"); + if (err) + goto err00; + err = gpio_direction_input(H3XXX_GPIO_PCMCIA_IRQ0); + if (err) + goto err01; + skt->socket.pci_irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_IRQ0); + + err = gpio_request(H3XXX_GPIO_PCMCIA_CD0, "PCMCIA CD0"); + if (err) + goto err01; + err = gpio_direction_input(H3XXX_GPIO_PCMCIA_CD0); + if (err) + goto err02; + irqs[0].irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_CD0); + + err = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); + if (err) + goto err02; + break; + case 1: + err = gpio_request(H3XXX_GPIO_PCMCIA_IRQ1, "PCMCIA IRQ1"); + if (err) + goto err10; + err = gpio_direction_input(H3XXX_GPIO_PCMCIA_IRQ1); + if (err) + goto err11; + skt->socket.pci_irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_IRQ1); + + err = gpio_request(H3XXX_GPIO_PCMCIA_CD1, "PCMCIA CD1"); + if (err) + goto err11; + err = gpio_direction_input(H3XXX_GPIO_PCMCIA_CD1); + if (err) + goto err12; + irqs[1].irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_CD1); + + err = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); + if (err) + goto err12; + break; + } + return 0; + +err02: gpio_free(H3XXX_GPIO_PCMCIA_CD0); +err01: gpio_free(H3XXX_GPIO_PCMCIA_IRQ0); +err00: return err; - return soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); +err12: gpio_free(H3XXX_GPIO_PCMCIA_CD0); +err11: gpio_free(H3XXX_GPIO_PCMCIA_IRQ0); +err10: return err; } static void h3600_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) @@ -40,17 +92,25 @@ static void h3600_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) assign_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON, 0); assign_h3600_egpio(IPAQ_EGPIO_OPT_ON, 0); assign_h3600_egpio(IPAQ_EGPIO_OPT_RESET, 1); + switch (skt->nr) { + case 0: + gpio_free(H3XXX_GPIO_PCMCIA_CD0); + gpio_free(H3XXX_GPIO_PCMCIA_IRQ0); + break; + case 1: + gpio_free(H3XXX_GPIO_PCMCIA_CD1); + gpio_free(H3XXX_GPIO_PCMCIA_IRQ1); + break; + } } static void h3600_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_state *state) { - unsigned long levels = GPLR; - switch (skt->nr) { case 0: - state->detect = levels & GPIO_H3600_PCMCIA_CD0 ? 0 : 1; - state->ready = levels & GPIO_H3600_PCMCIA_IRQ0 ? 1 : 0; + state->detect = !gpio_get_value(H3XXX_GPIO_PCMCIA_CD0); + state->ready = !!gpio_get_value(H3XXX_GPIO_PCMCIA_IRQ0); state->bvd1 = 0; state->bvd2 = 0; state->wrprot = 0; /* Not available on H3600. */ @@ -59,8 +119,8 @@ h3600_pcmcia_socket_state(struct soc_pcmcia_socket *skt, struct pcmcia_state *st break; case 1: - state->detect = levels & GPIO_H3600_PCMCIA_CD1 ? 0 : 1; - state->ready = levels & GPIO_H3600_PCMCIA_IRQ1 ? 1 : 0; + state->detect = !gpio_get_value(H3XXX_GPIO_PCMCIA_CD1); + state->ready = !!gpio_get_value(H3XXX_GPIO_PCMCIA_IRQ1); state->bvd1 = 0; state->bvd2 = 0; state->wrprot = 0; /* Not available on H3600. */ -- cgit v0.10.2 From cf5a87d80a70958b0622042b0b447f61aca1caf8 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 11:58:35 +0100 Subject: ARM: 5812/1: SA1100: h3100/h3600: separate machine-specific LCD helpers h3100 and h3600 have different sets of LCD-controlling gpios, which mapped to the same "abstracted" EGPIO. As we plan to get rid of those abstracted egpios completely, we need to separate these helper functions. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 021cecb..1297c11 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -191,14 +191,6 @@ static void __init h3xxx_mach_init(void) sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1); } -/* - * helper for sa1100fb - */ -static void h3xxx_lcd_power(int enable) -{ - assign_h3600_egpio(IPAQ_EGPIO_LCD_POWER, enable); -} - static struct map_desc h3600_io_desc[] __initdata = { { /* static memory bank 2 CS#2 */ .virtual = H3600_BANK_2_VIRT, @@ -240,7 +232,6 @@ static void __init h3xxx_map_io(void) PCFR = PCFR_OPDE; PSDR = 0; - sa1100fb_lcd_power = h3xxx_lcd_power; } /************************* H3100 *************************/ @@ -321,11 +312,21 @@ static void h3100_control_egpio(enum ipaq_egpio_type x, int setp) | GPIO_H3100_AUD_PWR_ON \ | GPIO_H3100_IR_ON \ | GPIO_H3100_IR_FSEL) +/* + * helper for sa1100fb + */ +static void h3100_lcd_power(int enable) +{ + assign_h3600_egpio(IPAQ_EGPIO_LCD_POWER, enable); +} + static void __init h3100_map_io(void) { h3xxx_map_io(); + sa1100fb_lcd_power = h3100_lcd_power; + /* Initialize h3100-specific values here */ GPCR = 0x0fffffff; /* All outputs are set low by default */ GPDR = GPIO_H3600_L3_CLOCK | @@ -455,10 +456,20 @@ static void h3600_control_egpio(enum ipaq_egpio_type x, int setp) } } +/* + * helper for sa1100fb + */ +static void h3600_lcd_power(int enable) +{ + assign_h3600_egpio(IPAQ_EGPIO_LCD_POWER, enable); +} + static void __init h3600_map_io(void) { h3xxx_map_io(); + sa1100fb_lcd_power = h3600_lcd_power; + /* Initialize h3600-specific values here */ GPCR = 0x0fffffff; /* All outputs are set low by default */ -- cgit v0.10.2 From 2eec62d7dbaa8ed05f318d88f938a86fcf274b34 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:00:00 +0100 Subject: ARM: 5813/1: SA1100: h3100/h3600: add htc-egpio driver It will be used for future conversion of assign_h3600_egpio calls to gpiolib. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 1297c11..429fdb0 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -25,10 +25,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -184,11 +186,54 @@ static struct sa1100_port_fns h3xxx_port_fns __initdata = { .set_wake = h3xxx_uart_set_wake, }; +/* + * EGPIO + */ + +static struct resource egpio_resources[] = { + [0] = { + .start = H3600_EGPIO_PHYS, + .end = H3600_EGPIO_PHYS + 0x4 - 1, + .flags = IORESOURCE_MEM, + }, +}; + +static struct htc_egpio_chip egpio_chips[] = { + [0] = { + .reg_start = 0, + .gpio_base = H3XXX_EGPIO_BASE, + .num_gpios = 16, + .direction = HTC_EGPIO_OUTPUT, + .initial_values = 0x0080, /* H3XXX_EGPIO_RS232_ON */ + }, +}; + +static struct htc_egpio_platform_data egpio_info = { + .reg_width = 16, + .bus_width = 16, + .chip = egpio_chips, + .num_chips = ARRAY_SIZE(egpio_chips), +}; + +static struct platform_device h3xxx_egpio = { + .name = "htc-egpio", + .id = -1, + .resource = egpio_resources, + .num_resources = ARRAY_SIZE(egpio_resources), + .dev = { + .platform_data = &egpio_info, + }, +}; + +static struct platform_device *h3xxx_devices[] = { + &h3xxx_egpio, +}; static void __init h3xxx_mach_init(void) { sa1100_register_uart_fns(&h3xxx_port_fns); sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1); + platform_add_devices(h3xxx_devices, ARRAY_SIZE(h3xxx_devices)); } static struct map_desc h3600_io_desc[] __initdata = { diff --git a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h index 18de674..ce80f1a 100644 --- a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h +++ b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h @@ -97,4 +97,28 @@ #define EGPIO_H3600_LVDD_ON (1 << 15) /* enable 9V and -6.5V to LCD. */ +/* gpiolib versions of EGPIOs */ + +/* H3100 / 3600 EGPIO pins */ +#define H3XXX_EGPIO_BASE (GPIO_MAX + 1) + +#define H3XXX_EGPIO_VPP_ON (H3XXX_EGPIO_BASE + 0) +#define H3XXX_EGPIO_CARD_RESET (H3XXX_EGPIO_BASE + 1) /* reset the attached pcmcia/compactflash card. active high. */ +#define H3XXX_EGPIO_OPT_RESET (H3XXX_EGPIO_BASE + 2) /* reset the attached option pack. active high. */ +#define H3XXX_EGPIO_CODEC_NRESET (H3XXX_EGPIO_BASE + 3) /* reset the onboard UDA1341. active low. */ +#define H3XXX_EGPIO_OPT_NVRAM_ON (H3XXX_EGPIO_BASE + 4) /* apply power to optionpack nvram, active high. */ +#define H3XXX_EGPIO_OPT_ON (H3XXX_EGPIO_BASE + 5) /* full power to option pack. active high. */ +#define H3XXX_EGPIO_LCD_ON (H3XXX_EGPIO_BASE + 6) /* enable 3.3V to LCD. active high. */ +#define H3XXX_EGPIO_RS232_ON (H3XXX_EGPIO_BASE + 7) /* UART3 transceiver force on. Active high. */ + +/* H3600 only EGPIO pins */ +#define H3600_EGPIO_LCD_PCI (H3XXX_EGPIO_BASE + 8) /* LCD control IC enable. active high. */ +#define H3600_EGPIO_IR_ON (H3XXX_EGPIO_BASE + 9) /* apply power to IR module. active high. */ +#define H3600_EGPIO_AUD_AMP_ON (H3XXX_EGPIO_BASE + 10) /* apply power to audio power amp. active high. */ +#define H3600_EGPIO_AUD_PWR_ON (H3XXX_EGPIO_BASE + 11) /* apply power to reset of audio circuit. active high. */ +#define H3600_EGPIO_QMUTE (H3XXX_EGPIO_BASE + 12) /* mute control for onboard UDA1341. active high. */ +#define H3600_EGPIO_IR_FSEL (H3XXX_EGPIO_BASE + 13) /* IR speed select: 1->fast, 0->slow */ +#define H3600_EGPIO_LCD_5V_ON (H3XXX_EGPIO_BASE + 14) /* enable 5V to LCD. active high. */ +#define H3600_EGPIO_LVDD_ON (H3XXX_EGPIO_BASE + 15) /* enable 9V and -6.5V to LCD. */ + #endif /* _INCLUDE_H3600_GPIO_H_ */ -- cgit v0.10.2 From 22f9740552b89c9f458f972f881d222b298ab165 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:02:28 +0100 Subject: ARM: 5814/1: SA1100: h3100/h3600: convert all users of assign_h3600_egpio to gpiolib Use of gpio_request/gpio_free in some callbacks may look ugly, but corresponding drivers (sa1100_serial and sa1100_fb) don't provide (yet) init/exit hooks and registering these gpios in *_mach_init is also not possible, because htc-gpio driver starts a bit later... Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 429fdb0..5e6011c 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -111,12 +111,32 @@ static struct mtd_partition h3xxx_partitions[] = { static void h3xxx_set_vpp(int vpp) { - assign_h3600_egpio(IPAQ_EGPIO_VPP_ON, vpp); + gpio_set_value(H3XXX_EGPIO_VPP_ON, vpp); +} + +static int h3xxx_flash_init(void) +{ + int err = gpio_request(H3XXX_EGPIO_VPP_ON, "Flash Vpp"); + if (err) + return err; + + err = gpio_direction_output(H3XXX_EGPIO_VPP_ON, 0); + if (err) + gpio_free(H3XXX_EGPIO_VPP_ON); + + return err; +} + +static void h3xxx_flash_exit(void) +{ + gpio_free(H3XXX_EGPIO_VPP_ON); } static struct flash_platform_data h3xxx_flash_data = { .map_name = "cfi_probe", .set_vpp = h3xxx_set_vpp, + .init = h3xxx_flash_init, + .exit = h3xxx_flash_exit, .parts = h3xxx_partitions, .nr_parts = ARRAY_SIZE(h3xxx_partitions), }; @@ -158,7 +178,10 @@ static u_int h3xxx_uart_get_mctrl(struct uart_port *port) static void h3xxx_uart_pm(struct uart_port *port, u_int state, u_int oldstate) { if (port->mapbase == _Ser3UTCR0) - assign_h3600_egpio(IPAQ_EGPIO_RS232_ON, !state); + if (!gpio_request(H3XXX_EGPIO_RS232_ON, "RS232 transceiver")) { + gpio_direction_output(H3XXX_EGPIO_RS232_ON, !state); + gpio_free(H3XXX_EGPIO_RS232_ON); + } } /* @@ -362,7 +385,11 @@ static void h3100_control_egpio(enum ipaq_egpio_type x, int setp) */ static void h3100_lcd_power(int enable) { - assign_h3600_egpio(IPAQ_EGPIO_LCD_POWER, enable); + if (!gpio_request(H3XXX_EGPIO_LCD_ON, "LCD ON")) { + gpio_set_value(H3100_GPIO_LCD_3V_ON, enable); + gpio_direction_output(H3XXX_EGPIO_LCD_ON, enable); + gpio_free(H3XXX_EGPIO_LCD_ON); + } } @@ -412,6 +439,7 @@ static struct gpio_default_state h3100_default_gpio[] = { { H3XXX_GPIO_COM_DCD, GPIO_MODE_IN, "COM DCD" }, { H3XXX_GPIO_COM_CTS, GPIO_MODE_IN, "COM CTS" }, { H3XXX_GPIO_COM_RTS, GPIO_MODE_OUT0, "COM RTS" }, + { H3100_GPIO_LCD_3V_ON, GPIO_MODE_OUT0, "LCD 3v" }, }; static void __init h3100_mach_init(void) @@ -506,7 +534,25 @@ static void h3600_control_egpio(enum ipaq_egpio_type x, int setp) */ static void h3600_lcd_power(int enable) { - assign_h3600_egpio(IPAQ_EGPIO_LCD_POWER, enable); + if (gpio_request(H3XXX_EGPIO_LCD_ON, "LCD power")) + goto err1; + if (gpio_request(H3600_EGPIO_LCD_PCI, "LCD control")) + goto err2; + if (gpio_request(H3600_EGPIO_LCD_5V_ON, "LCD 5v")) + goto err3; + if (gpio_request(H3600_EGPIO_LVDD_ON, "LCD 9v/-6.5v")) + goto err4; + + gpio_direction_output(H3XXX_EGPIO_LCD_ON, enable); + gpio_direction_output(H3600_EGPIO_LCD_PCI, enable); + gpio_direction_output(H3600_EGPIO_LCD_5V_ON, enable); + gpio_direction_output(H3600_EGPIO_LVDD_ON, enable); + + gpio_free(H3600_EGPIO_LVDD_ON); +err4: gpio_free(H3600_EGPIO_LCD_5V_ON); +err3: gpio_free(H3600_EGPIO_LCD_PCI); +err2: gpio_free(H3XXX_EGPIO_LCD_ON); +err1: return; } static void __init h3600_map_io(void) @@ -531,18 +577,47 @@ static void __init h3600_map_io(void) */ static int h3600_irda_set_power(struct device *dev, unsigned int state) { - assign_h3600_egpio(IPAQ_EGPIO_IR_ON, state); + gpio_set_value(H3600_EGPIO_IR_ON, state); return 0; } static void h3600_irda_set_speed(struct device *dev, unsigned int speed) { - assign_h3600_egpio(IPAQ_EGPIO_IR_FSEL, !(speed < 4000000)); + gpio_set_value(H3600_EGPIO_IR_FSEL, !(speed < 4000000)); +} + +static int h3600_irda_startup(struct device *dev) +{ + int err = gpio_request(H3600_EGPIO_IR_ON, "IrDA power"); + if (err) + goto err1; + err = gpio_direction_output(H3600_EGPIO_IR_ON, 0); + if (err) + goto err2; + err = gpio_request(H3600_EGPIO_IR_FSEL, "IrDA fsel"); + if (err) + goto err2; + err = gpio_direction_output(H3600_EGPIO_IR_FSEL, 0); + if (err) + goto err3; + return 0; + +err3: gpio_free(H3600_EGPIO_IR_FSEL); +err2: gpio_free(H3600_EGPIO_IR_ON); +err1: return err; +} + +static void h3600_irda_shutdown(struct device *dev) +{ + gpio_free(H3600_EGPIO_IR_ON); + gpio_free(H3600_EGPIO_IR_FSEL); } static struct irda_platform_data h3600_irda_data = { .set_power = h3600_irda_set_power, .set_speed = h3600_irda_set_speed, + .startup = h3600_irda_startup, + .shutdown = h3600_irda_shutdown, }; static struct gpio_default_state h3600_default_gpio[] = { diff --git a/drivers/pcmcia/sa1100_h3600.c b/drivers/pcmcia/sa1100_h3600.c index 97e5667..fd7af12 100644 --- a/drivers/pcmcia/sa1100_h3600.c +++ b/drivers/pcmcia/sa1100_h3600.c @@ -47,9 +47,33 @@ static int h3600_pcmcia_hw_init(struct soc_pcmcia_socket *skt) goto err02; irqs[0].irq = gpio_to_irq(H3XXX_GPIO_PCMCIA_CD0); - err = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); + err = gpio_request(H3XXX_EGPIO_OPT_NVRAM_ON, "OPT NVRAM ON"); if (err) goto err02; + err = gpio_direction_output(H3XXX_EGPIO_OPT_NVRAM_ON, 0); + if (err) + goto err03; + err = gpio_request(H3XXX_EGPIO_OPT_ON, "OPT ON"); + if (err) + goto err03; + err = gpio_direction_output(H3XXX_EGPIO_OPT_ON, 0); + if (err) + goto err04; + err = gpio_request(H3XXX_EGPIO_OPT_RESET, "OPT RESET"); + if (err) + goto err04; + err = gpio_direction_output(H3XXX_EGPIO_OPT_RESET, 0); + if (err) + goto err05; + err = gpio_request(H3XXX_EGPIO_CARD_RESET, "PCMCIA CARD RESET"); + if (err) + goto err05; + err = gpio_direction_output(H3XXX_EGPIO_CARD_RESET, 0); + if (err) + goto err06; + err = soc_pcmcia_request_irqs(skt, irqs, ARRAY_SIZE(irqs)); + if (err) + goto err06; break; case 1: err = gpio_request(H3XXX_GPIO_PCMCIA_IRQ1, "PCMCIA IRQ1"); @@ -75,6 +99,10 @@ static int h3600_pcmcia_hw_init(struct soc_pcmcia_socket *skt) } return 0; +err06: gpio_free(H3XXX_EGPIO_CARD_RESET); +err05: gpio_free(H3XXX_EGPIO_OPT_RESET); +err04: gpio_free(H3XXX_EGPIO_OPT_ON); +err03: gpio_free(H3XXX_EGPIO_OPT_NVRAM_ON); err02: gpio_free(H3XXX_GPIO_PCMCIA_CD0); err01: gpio_free(H3XXX_GPIO_PCMCIA_IRQ0); err00: return err; @@ -88,12 +116,17 @@ static void h3600_pcmcia_hw_shutdown(struct soc_pcmcia_socket *skt) { soc_pcmcia_free_irqs(skt, irqs, ARRAY_SIZE(irqs)); - /* Disable CF bus: */ - assign_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON, 0); - assign_h3600_egpio(IPAQ_EGPIO_OPT_ON, 0); - assign_h3600_egpio(IPAQ_EGPIO_OPT_RESET, 1); switch (skt->nr) { case 0: + /* Disable CF bus: */ + gpio_set_value(H3XXX_EGPIO_OPT_NVRAM_ON, 0); + gpio_set_value(H3XXX_EGPIO_OPT_ON, 0); + gpio_set_value(H3XXX_EGPIO_OPT_RESET, 1); + + gpio_free(H3XXX_EGPIO_CARD_RESET); + gpio_free(H3XXX_EGPIO_OPT_RESET); + gpio_free(H3XXX_EGPIO_OPT_ON); + gpio_free(H3XXX_EGPIO_OPT_NVRAM_ON); gpio_free(H3XXX_GPIO_PCMCIA_CD0); gpio_free(H3XXX_GPIO_PCMCIA_IRQ0); break; @@ -139,7 +172,7 @@ h3600_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_ return -1; } - assign_h3600_egpio(IPAQ_EGPIO_CARD_RESET, !!(state->flags & SS_RESET)); + gpio_set_value(H3XXX_EGPIO_CARD_RESET, !!(state->flags & SS_RESET)); /* Silently ignore Vpp, output enable, speaker enable. */ @@ -149,9 +182,9 @@ h3600_pcmcia_configure_socket(struct soc_pcmcia_socket *skt, const socket_state_ static void h3600_pcmcia_socket_init(struct soc_pcmcia_socket *skt) { /* Enable CF bus: */ - assign_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON, 1); - assign_h3600_egpio(IPAQ_EGPIO_OPT_ON, 1); - assign_h3600_egpio(IPAQ_EGPIO_OPT_RESET, 0); + gpio_set_value(H3XXX_EGPIO_OPT_NVRAM_ON, 1); + gpio_set_value(H3XXX_EGPIO_OPT_ON, 1); + gpio_set_value(H3XXX_EGPIO_OPT_RESET, 0); msleep(10); @@ -169,10 +202,10 @@ static void h3600_pcmcia_socket_suspend(struct soc_pcmcia_socket *skt) * socket 0 then socket 1. */ if (skt->nr == 1) { - assign_h3600_egpio(IPAQ_EGPIO_OPT_ON, 0); - assign_h3600_egpio(IPAQ_EGPIO_OPT_NVRAM_ON, 0); + gpio_set_value(H3XXX_EGPIO_OPT_ON, 0); + gpio_set_value(H3XXX_EGPIO_OPT_NVRAM_ON, 0); /* hmm, does this suck power? */ - assign_h3600_egpio(IPAQ_EGPIO_OPT_RESET, 1); + gpio_set_value(H3XXX_EGPIO_OPT_RESET, 1); } } -- cgit v0.10.2 From 766f0378a745596eae0bb625b9e3f05a7e7be476 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:03:52 +0100 Subject: ARM: 5815/1: SA1100: h3100/h3600: remove now unused assign_h3600_egpio handlers As all users of assign_h3600_egpio now converted to gpiolib, we can safely remove all assign_h3600_egpio handling code and definitions. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 5e6011c..6792dd1 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -49,9 +49,6 @@ #include "generic.h" -void (*assign_h3600_egpio)(enum ipaq_egpio_type x, int level); -EXPORT_SYMBOL(assign_h3600_egpio); - struct gpio_default_state { int gpio; int mode; @@ -306,72 +303,6 @@ static void __init h3xxx_map_io(void) #ifdef CONFIG_SA1100_H3100 -#define H3100_EGPIO (*(volatile unsigned int *)H3600_EGPIO_VIRT) -static unsigned int h3100_egpio = 0; - -static void h3100_control_egpio(enum ipaq_egpio_type x, int setp) -{ - unsigned int egpio = 0; - long gpio = 0; - unsigned long flags; - - switch (x) { - case IPAQ_EGPIO_LCD_POWER: - egpio |= EGPIO_H3600_LCD_ON; - gpio |= GPIO_H3100_LCD_3V_ON; - break; - case IPAQ_EGPIO_LCD_ENABLE: - break; - case IPAQ_EGPIO_CODEC_NRESET: - egpio |= EGPIO_H3600_CODEC_NRESET; - break; - case IPAQ_EGPIO_AUDIO_ON: - gpio |= GPIO_H3100_AUD_PWR_ON - | GPIO_H3100_AUD_ON; - break; - case IPAQ_EGPIO_QMUTE: - gpio |= GPIO_H3100_QMUTE; - break; - case IPAQ_EGPIO_OPT_NVRAM_ON: - egpio |= EGPIO_H3600_OPT_NVRAM_ON; - break; - case IPAQ_EGPIO_OPT_ON: - egpio |= EGPIO_H3600_OPT_ON; - break; - case IPAQ_EGPIO_CARD_RESET: - egpio |= EGPIO_H3600_CARD_RESET; - break; - case IPAQ_EGPIO_OPT_RESET: - egpio |= EGPIO_H3600_OPT_RESET; - break; - case IPAQ_EGPIO_IR_ON: - gpio |= GPIO_H3100_IR_ON; - break; - case IPAQ_EGPIO_IR_FSEL: - gpio |= GPIO_H3100_IR_FSEL; - break; - case IPAQ_EGPIO_RS232_ON: - egpio |= EGPIO_H3600_RS232_ON; - break; - case IPAQ_EGPIO_VPP_ON: - egpio |= EGPIO_H3600_VPP_ON; - break; - } - - if (egpio || gpio) { - local_irq_save(flags); - if (setp) { - h3100_egpio |= egpio; - GPSR = gpio; - } else { - h3100_egpio &= ~egpio; - GPCR = gpio; - } - H3100_EGPIO = h3100_egpio; - local_irq_restore(flags); - } -} - #define H3100_DIRECT_EGPIO (GPIO_H3100_BT_ON \ | GPIO_H3100_GPIO3 \ | GPIO_H3100_QMUTE \ @@ -409,9 +340,6 @@ static void __init h3100_map_io(void) /* Older bootldrs put GPIO2-9 in alternate mode on the assumption that they are used for video */ GAFR &= ~H3100_DIRECT_EGPIO; - - H3100_EGPIO = h3100_egpio; - assign_h3600_egpio = h3100_control_egpio; } /* @@ -465,70 +393,6 @@ MACHINE_END #ifdef CONFIG_SA1100_H3600 -#define H3600_EGPIO (*(volatile unsigned int *)H3600_EGPIO_VIRT) -static unsigned int h3600_egpio = EGPIO_H3600_RS232_ON; - -static void h3600_control_egpio(enum ipaq_egpio_type x, int setp) -{ - unsigned int egpio = 0; - unsigned long flags; - - switch (x) { - case IPAQ_EGPIO_LCD_POWER: - egpio |= EGPIO_H3600_LCD_ON | - EGPIO_H3600_LCD_PCI | - EGPIO_H3600_LCD_5V_ON | - EGPIO_H3600_LVDD_ON; - break; - case IPAQ_EGPIO_LCD_ENABLE: - break; - case IPAQ_EGPIO_CODEC_NRESET: - egpio |= EGPIO_H3600_CODEC_NRESET; - break; - case IPAQ_EGPIO_AUDIO_ON: - egpio |= EGPIO_H3600_AUD_AMP_ON | - EGPIO_H3600_AUD_PWR_ON; - break; - case IPAQ_EGPIO_QMUTE: - egpio |= EGPIO_H3600_QMUTE; - break; - case IPAQ_EGPIO_OPT_NVRAM_ON: - egpio |= EGPIO_H3600_OPT_NVRAM_ON; - break; - case IPAQ_EGPIO_OPT_ON: - egpio |= EGPIO_H3600_OPT_ON; - break; - case IPAQ_EGPIO_CARD_RESET: - egpio |= EGPIO_H3600_CARD_RESET; - break; - case IPAQ_EGPIO_OPT_RESET: - egpio |= EGPIO_H3600_OPT_RESET; - break; - case IPAQ_EGPIO_IR_ON: - egpio |= EGPIO_H3600_IR_ON; - break; - case IPAQ_EGPIO_IR_FSEL: - egpio |= EGPIO_H3600_IR_FSEL; - break; - case IPAQ_EGPIO_RS232_ON: - egpio |= EGPIO_H3600_RS232_ON; - break; - case IPAQ_EGPIO_VPP_ON: - egpio |= EGPIO_H3600_VPP_ON; - break; - } - - if (egpio) { - local_irq_save(flags); - if (setp) - h3600_egpio |= egpio; - else - h3600_egpio &= ~egpio; - H3600_EGPIO = h3600_egpio; - local_irq_restore(flags); - } -} - /* * helper for sa1100fb */ @@ -567,9 +431,6 @@ static void __init h3600_map_io(void) GPDR = GPIO_H3600_L3_CLOCK | GPIO_H3600_L3_MODE | GPIO_H3600_L3_DATA | GPIO_H3600_CLK_SET1 | GPIO_H3600_CLK_SET0; - - H3600_EGPIO = h3600_egpio; /* Maintains across sleep? */ - assign_h3600_egpio = h3600_control_egpio; } /* diff --git a/arch/arm/mach-sa1100/include/mach/h3600.h b/arch/arm/mach-sa1100/include/mach/h3600.h index 69f138c..19d7fe1 100644 --- a/arch/arm/mach-sa1100/include/mach/h3600.h +++ b/arch/arm/mach-sa1100/include/mach/h3600.h @@ -66,27 +66,4 @@ #define IRQ_GPIO_H3600_OPT_IRQ IRQ_GPIO24 #define IRQ_GPIO_H3600_COM_CTS IRQ_GPIO25 - -#ifndef __ASSEMBLY__ - -enum ipaq_egpio_type { - IPAQ_EGPIO_LCD_POWER, /* Power to the LCD panel */ - IPAQ_EGPIO_CODEC_NRESET, /* Clear to reset the audio codec (remember to return high) */ - IPAQ_EGPIO_AUDIO_ON, /* Audio power */ - IPAQ_EGPIO_QMUTE, /* Audio muting */ - IPAQ_EGPIO_OPT_NVRAM_ON, /* Non-volatile RAM on extension sleeves (SPI interface) */ - IPAQ_EGPIO_OPT_ON, /* Power to extension sleeves */ - IPAQ_EGPIO_CARD_RESET, /* Reset PCMCIA cards on extension sleeve (???) */ - IPAQ_EGPIO_OPT_RESET, /* Reset option pack (???) */ - IPAQ_EGPIO_IR_ON, /* IR sensor/emitter power */ - IPAQ_EGPIO_IR_FSEL, /* IR speed selection 1->fast, 0->slow */ - IPAQ_EGPIO_RS232_ON, /* Maxim RS232 chip power */ - IPAQ_EGPIO_VPP_ON, /* Turn on power to flash programming */ - IPAQ_EGPIO_LCD_ENABLE, /* Enable/disable LCD controller */ -}; - -extern void (*assign_h3600_egpio)(enum ipaq_egpio_type x, int level); - -#endif /* ASSEMBLY */ - #endif /* _INCLUDE_H3600_H_ */ diff --git a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h index ce80f1a..6cfbc64 100644 --- a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h +++ b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h @@ -76,28 +76,6 @@ #define IRQ_GPIO_H3600_ACTION_BUTTON IRQ_GPIO18 #define IRQ_GPIO_H3600_OPT_DET IRQ_GPIO27 -/* H3100 / 3600 EGPIO pins */ -#define EGPIO_H3600_VPP_ON (1 << 0) -#define EGPIO_H3600_CARD_RESET (1 << 1) /* reset the attached pcmcia/compactflash card. active high. */ -#define EGPIO_H3600_OPT_RESET (1 << 2) /* reset the attached option pack. active high. */ -#define EGPIO_H3600_CODEC_NRESET (1 << 3) /* reset the onboard UDA1341. active low. */ -#define EGPIO_H3600_OPT_NVRAM_ON (1 << 4) /* apply power to optionpack nvram, active high. */ -#define EGPIO_H3600_OPT_ON (1 << 5) /* full power to option pack. active high. */ -#define EGPIO_H3600_LCD_ON (1 << 6) /* enable 3.3V to LCD. active high. */ -#define EGPIO_H3600_RS232_ON (1 << 7) /* UART3 transceiver force on. Active high. */ - -/* H3600 only EGPIO pins */ -#define EGPIO_H3600_LCD_PCI (1 << 8) /* LCD control IC enable. active high. */ -#define EGPIO_H3600_IR_ON (1 << 9) /* apply power to IR module. active high. */ -#define EGPIO_H3600_AUD_AMP_ON (1 << 10) /* apply power to audio power amp. active high. */ -#define EGPIO_H3600_AUD_PWR_ON (1 << 11) /* apply power to reset of audio circuit. active high. */ -#define EGPIO_H3600_QMUTE (1 << 12) /* mute control for onboard UDA1341. active high. */ -#define EGPIO_H3600_IR_FSEL (1 << 13) /* IR speed select: 1->fast, 0->slow */ -#define EGPIO_H3600_LCD_5V_ON (1 << 14) /* enable 5V to LCD. active high. */ -#define EGPIO_H3600_LVDD_ON (1 << 15) /* enable 9V and -6.5V to LCD. */ - - -/* gpiolib versions of EGPIOs */ /* H3100 / 3600 EGPIO pins */ #define H3XXX_EGPIO_BASE (GPIO_MAX + 1) -- cgit v0.10.2 From ca912b17c0af886d9090449d5933ac6ca9a7a20f Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:05:28 +0100 Subject: ARM: 5816/1: SA1100: h3600: remove IRQ_GPIO_* definitions As all the remaining users of these definitions (in pcmcia/sa1100_h3600 driver) were converted to gpio_to_irq(), they can be safely removed. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/include/mach/h3600.h b/arch/arm/mach-sa1100/include/mach/h3600.h index 19d7fe1..f4911db 100644 --- a/arch/arm/mach-sa1100/include/mach/h3600.h +++ b/arch/arm/mach-sa1100/include/mach/h3600.h @@ -57,13 +57,4 @@ #define GPIO_H3600_COM_CTS GPIO_GPIO (25) #define GPIO_H3600_COM_RTS GPIO_GPIO (26) -#define IRQ_GPIO_H3600_NPOWER_BUTTON IRQ_GPIO0 -#define IRQ_GPIO_H3600_PCMCIA_CD1 IRQ_GPIO10 -#define IRQ_GPIO_H3600_PCMCIA_IRQ1 IRQ_GPIO11 -#define IRQ_GPIO_H3600_PCMCIA_CD0 IRQ_GPIO17 -#define IRQ_GPIO_H3600_PCMCIA_IRQ0 IRQ_GPIO21 -#define IRQ_GPIO_H3600_COM_DCD IRQ_GPIO23 -#define IRQ_GPIO_H3600_OPT_IRQ IRQ_GPIO24 -#define IRQ_GPIO_H3600_COM_CTS IRQ_GPIO25 - #endif /* _INCLUDE_H3600_H_ */ diff --git a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h index 6cfbc64..0c0bfa5 100644 --- a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h +++ b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h @@ -71,11 +71,6 @@ #define GPIO_H3600_OPT_LOCK GPIO_GPIO (22) #define GPIO_H3600_OPT_DET GPIO_GPIO (27) -/****************************************************/ - -#define IRQ_GPIO_H3600_ACTION_BUTTON IRQ_GPIO18 -#define IRQ_GPIO_H3600_OPT_DET IRQ_GPIO27 - /* H3100 / 3600 EGPIO pins */ #define H3XXX_EGPIO_BASE (GPIO_MAX + 1) -- cgit v0.10.2 From 51834901faee2850e55865589cefd7588202cbf7 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:07:11 +0100 Subject: ARM: 5817/1: SA1100: h3100/h3600: configure all unused gpios as inputs After conversion to gpiolib there's still some GPIOs left, that get configured in *_mach_init() as outputs (using direct operations on GPCR/GPDR registers), but otherwise unused. These GPIOs are mainly sound related and should be configured by corresponding driver once it is written. Drop this initialisation and configure all GPIOs as input. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 6792dd1..3d52190 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -297,20 +297,14 @@ static void __init h3xxx_map_io(void) PCFR = PCFR_OPDE; PSDR = 0; + GPCR = 0x0fffffff; /* All outputs are set low by default */ + GPDR = 0; /* Configure all GPIOs as input */ } /************************* H3100 *************************/ #ifdef CONFIG_SA1100_H3100 -#define H3100_DIRECT_EGPIO (GPIO_H3100_BT_ON \ - | GPIO_H3100_GPIO3 \ - | GPIO_H3100_QMUTE \ - | GPIO_H3100_LCD_3V_ON \ - | GPIO_H3100_AUD_ON \ - | GPIO_H3100_AUD_PWR_ON \ - | GPIO_H3100_IR_ON \ - | GPIO_H3100_IR_FSEL) /* * helper for sa1100fb */ @@ -330,16 +324,9 @@ static void __init h3100_map_io(void) sa1100fb_lcd_power = h3100_lcd_power; - /* Initialize h3100-specific values here */ - GPCR = 0x0fffffff; /* All outputs are set low by default */ - GPDR = GPIO_H3600_L3_CLOCK | - GPIO_H3600_L3_MODE | GPIO_H3600_L3_DATA | - GPIO_H3600_CLK_SET1 | GPIO_H3600_CLK_SET0 | - H3100_DIRECT_EGPIO; - /* Older bootldrs put GPIO2-9 in alternate mode on the assumption that they are used for video */ - GAFR &= ~H3100_DIRECT_EGPIO; + GAFR &= ~0x000001fb; } /* @@ -424,13 +411,6 @@ static void __init h3600_map_io(void) h3xxx_map_io(); sa1100fb_lcd_power = h3600_lcd_power; - - /* Initialize h3600-specific values here */ - - GPCR = 0x0fffffff; /* All outputs are set low by default */ - GPDR = GPIO_H3600_L3_CLOCK | - GPIO_H3600_L3_MODE | GPIO_H3600_L3_DATA | - GPIO_H3600_CLK_SET1 | GPIO_H3600_CLK_SET0; } /* -- cgit v0.10.2 From 4c88a5c20f59c07afc4536c219a9c1f196cec36d Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:07:47 +0100 Subject: ARM: 5818/1: SA1100: h3100/h3600: drop old GPIO definitions As all existing code was converted to gpiolib, drop no more used pre-gpiolib (bit-shifted) GPIO definintions. Supply new gpiolib-friendly definitions for GPIOs which don't have them yet. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/include/mach/h3600.h b/arch/arm/mach-sa1100/include/mach/h3600.h index f4911db..e420422 100644 --- a/arch/arm/mach-sa1100/include/mach/h3600.h +++ b/arch/arm/mach-sa1100/include/mach/h3600.h @@ -33,28 +33,4 @@ #define H3600_BANK_2_VIRT 0xf1000000 #define H3600_BANK_4_VIRT 0xf3800000 -/* - Machine-independent GPIO definitions - --- these are common across all current iPAQ platforms -*/ - -#define GPIO_H3600_NPOWER_BUTTON GPIO_GPIO (0) /* Also known as the "off button" */ - -#define GPIO_H3600_PCMCIA_CD1 GPIO_GPIO (10) -#define GPIO_H3600_PCMCIA_IRQ1 GPIO_GPIO (11) - -/* UDA1341 L3 Interface */ -#define GPIO_H3600_L3_DATA GPIO_GPIO (14) -#define GPIO_H3600_L3_MODE GPIO_GPIO (15) -#define GPIO_H3600_L3_CLOCK GPIO_GPIO (16) - -#define GPIO_H3600_PCMCIA_CD0 GPIO_GPIO (17) -#define GPIO_H3600_SYS_CLK GPIO_GPIO (19) -#define GPIO_H3600_PCMCIA_IRQ0 GPIO_GPIO (21) - -#define GPIO_H3600_COM_DCD GPIO_GPIO (23) -#define GPIO_H3600_OPT_IRQ GPIO_GPIO (24) -#define GPIO_H3600_COM_CTS GPIO_GPIO (25) -#define GPIO_H3600_COM_RTS GPIO_GPIO (26) - #endif /* _INCLUDE_H3600_H_ */ diff --git a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h index 0c0bfa5..2537f35 100644 --- a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h +++ b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h @@ -30,6 +30,7 @@ #define H3XXX_GPIO_PCMCIA_CD1 10 #define H3XXX_GPIO_PCMCIA_IRQ1 11 #define H3XXX_GPIO_PCMCIA_CD0 17 +#define H3XXX_GPIO_ACTION_BUTTON 18 #define H3XXX_GPIO_SYS_CLK 19 #define H3XXX_GPIO_PCMCIA_IRQ0 21 #define H3XXX_GPIO_COM_DCD 23 @@ -37,23 +38,8 @@ #define H3XXX_GPIO_COM_CTS 25 #define H3XXX_GPIO_COM_RTS 26 -/* - * GPIO lines that are common across ALL iPAQ models are in "h3600.h" - * This file contains machine-specific definitions - */ +/* machine-specific gpios */ -#define GPIO_H3600_SUSPEND GPIO_GPIO (0) -/* GPIO[2:9] used by LCD on H3600/3800, used as GPIO on H3100 */ -#define GPIO_H3100_BT_ON GPIO_GPIO (2) -#define GPIO_H3100_GPIO3 GPIO_GPIO (3) -#define GPIO_H3100_QMUTE GPIO_GPIO (4) -#define GPIO_H3100_LCD_3V_ON GPIO_GPIO (5) -#define GPIO_H3100_AUD_ON GPIO_GPIO (6) -#define GPIO_H3100_AUD_PWR_ON GPIO_GPIO (7) -#define GPIO_H3100_IR_ON GPIO_GPIO (8) -#define GPIO_H3100_IR_FSEL GPIO_GPIO (9) - -/* gpiolib versions of the above */ #define H3100_GPIO_BT_ON 2 #define H3100_GPIO_QMUTE 4 #define H3100_GPIO_LCD_3V_ON 5 @@ -62,14 +48,11 @@ #define H3100_GPIO_IR_ON 8 #define H3100_GPIO_IR_FSEL 9 -/* for H3600, audio sample rate clock generator */ -#define GPIO_H3600_CLK_SET0 GPIO_GPIO (12) -#define GPIO_H3600_CLK_SET1 GPIO_GPIO (13) - -#define GPIO_H3600_ACTION_BUTTON GPIO_GPIO (18) -#define GPIO_H3600_SOFT_RESET GPIO_GPIO (20) /* Also known as BATT_FAULT */ -#define GPIO_H3600_OPT_LOCK GPIO_GPIO (22) -#define GPIO_H3600_OPT_DET GPIO_GPIO (27) +#define H3600_GPIO_CLK_SET0 12 /* audio sample rate clock generator */ +#define H3600_GPIO_CLK_SET1 13 +#define H3600_GPIO_SOFT_RESET 20 /* also known as BATT_FAULT */ +#define H3600_GPIO_OPT_LOCK 22 +#define H3600_GPIO_OPT_DET 27 /* H3100 / 3600 EGPIO pins */ -- cgit v0.10.2 From 8715b29db2787f7c70f662b7b4d5b01017c61948 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:09:25 +0100 Subject: ARM: 5819/1: SA1100: h3100/h3600: merge h3600.h and h3600_gpio.h into h3xxx.h Combine both headers into one, rename to h3xxx.h and change all users accordingly. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 3d52190..dd39f90 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -44,8 +44,7 @@ #include #include -#include -#include +#include #include "generic.h" diff --git a/arch/arm/mach-sa1100/include/mach/h3600.h b/arch/arm/mach-sa1100/include/mach/h3600.h deleted file mode 100644 index e420422..0000000 --- a/arch/arm/mach-sa1100/include/mach/h3600.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * - * Definitions for H3600 Handheld Computer - * - * Copyright 2000 Compaq Computer Corporation. - * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 - * - */ - -#ifndef _INCLUDE_H3600_H_ -#define _INCLUDE_H3600_H_ - -/* Physical memory regions corresponding to chip selects */ -#define H3600_EGPIO_PHYS (SA1100_CS5_PHYS + 0x01000000) -#define H3600_BANK_2_PHYS SA1100_CS2_PHYS -#define H3600_BANK_4_PHYS SA1100_CS4_PHYS - -/* Virtual memory regions corresponding to chip selects 2 & 4 (used on sleeves) */ -#define H3600_EGPIO_VIRT 0xf0000000 -#define H3600_BANK_2_VIRT 0xf1000000 -#define H3600_BANK_4_VIRT 0xf3800000 - -#endif /* _INCLUDE_H3600_H_ */ diff --git a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h b/arch/arm/mach-sa1100/include/mach/h3600_gpio.h deleted file mode 100644 index 2537f35..0000000 --- a/arch/arm/mach-sa1100/include/mach/h3600_gpio.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * - * Definitions for H3600 Handheld Computer - * - * Copyright 2000 Compaq Computer Corporation. - * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 - * - */ - -#ifndef _INCLUDE_H3600_GPIO_H_ -#define _INCLUDE_H3600_GPIO_H_ - -/* - * gpiolib numbers for all iPAQs - */ -#define H3XXX_GPIO_PWR_BUTTON 0 -#define H3XXX_GPIO_PCMCIA_CD1 10 -#define H3XXX_GPIO_PCMCIA_IRQ1 11 -#define H3XXX_GPIO_PCMCIA_CD0 17 -#define H3XXX_GPIO_ACTION_BUTTON 18 -#define H3XXX_GPIO_SYS_CLK 19 -#define H3XXX_GPIO_PCMCIA_IRQ0 21 -#define H3XXX_GPIO_COM_DCD 23 -#define H3XXX_GPIO_OPTION 24 -#define H3XXX_GPIO_COM_CTS 25 -#define H3XXX_GPIO_COM_RTS 26 - -/* machine-specific gpios */ - -#define H3100_GPIO_BT_ON 2 -#define H3100_GPIO_QMUTE 4 -#define H3100_GPIO_LCD_3V_ON 5 -#define H3100_GPIO_AUD_ON 6 -#define H3100_GPIO_AUD_PWR_ON 7 -#define H3100_GPIO_IR_ON 8 -#define H3100_GPIO_IR_FSEL 9 - -#define H3600_GPIO_CLK_SET0 12 /* audio sample rate clock generator */ -#define H3600_GPIO_CLK_SET1 13 -#define H3600_GPIO_SOFT_RESET 20 /* also known as BATT_FAULT */ -#define H3600_GPIO_OPT_LOCK 22 -#define H3600_GPIO_OPT_DET 27 - - -/* H3100 / 3600 EGPIO pins */ -#define H3XXX_EGPIO_BASE (GPIO_MAX + 1) - -#define H3XXX_EGPIO_VPP_ON (H3XXX_EGPIO_BASE + 0) -#define H3XXX_EGPIO_CARD_RESET (H3XXX_EGPIO_BASE + 1) /* reset the attached pcmcia/compactflash card. active high. */ -#define H3XXX_EGPIO_OPT_RESET (H3XXX_EGPIO_BASE + 2) /* reset the attached option pack. active high. */ -#define H3XXX_EGPIO_CODEC_NRESET (H3XXX_EGPIO_BASE + 3) /* reset the onboard UDA1341. active low. */ -#define H3XXX_EGPIO_OPT_NVRAM_ON (H3XXX_EGPIO_BASE + 4) /* apply power to optionpack nvram, active high. */ -#define H3XXX_EGPIO_OPT_ON (H3XXX_EGPIO_BASE + 5) /* full power to option pack. active high. */ -#define H3XXX_EGPIO_LCD_ON (H3XXX_EGPIO_BASE + 6) /* enable 3.3V to LCD. active high. */ -#define H3XXX_EGPIO_RS232_ON (H3XXX_EGPIO_BASE + 7) /* UART3 transceiver force on. Active high. */ - -/* H3600 only EGPIO pins */ -#define H3600_EGPIO_LCD_PCI (H3XXX_EGPIO_BASE + 8) /* LCD control IC enable. active high. */ -#define H3600_EGPIO_IR_ON (H3XXX_EGPIO_BASE + 9) /* apply power to IR module. active high. */ -#define H3600_EGPIO_AUD_AMP_ON (H3XXX_EGPIO_BASE + 10) /* apply power to audio power amp. active high. */ -#define H3600_EGPIO_AUD_PWR_ON (H3XXX_EGPIO_BASE + 11) /* apply power to reset of audio circuit. active high. */ -#define H3600_EGPIO_QMUTE (H3XXX_EGPIO_BASE + 12) /* mute control for onboard UDA1341. active high. */ -#define H3600_EGPIO_IR_FSEL (H3XXX_EGPIO_BASE + 13) /* IR speed select: 1->fast, 0->slow */ -#define H3600_EGPIO_LCD_5V_ON (H3XXX_EGPIO_BASE + 14) /* enable 5V to LCD. active high. */ -#define H3600_EGPIO_LVDD_ON (H3XXX_EGPIO_BASE + 15) /* enable 9V and -6.5V to LCD. */ - -#endif /* _INCLUDE_H3600_GPIO_H_ */ diff --git a/arch/arm/mach-sa1100/include/mach/h3xxx.h b/arch/arm/mach-sa1100/include/mach/h3xxx.h new file mode 100644 index 0000000..7f1e130 --- /dev/null +++ b/arch/arm/mach-sa1100/include/mach/h3xxx.h @@ -0,0 +1,90 @@ +/* + * + * Definitions for H3600 Handheld Computer + * + * Copyright 2000 Compaq Computer Corporation. + * + * Use consistent with the GNU GPL is permitted, + * provided that this copyright notice is + * preserved in its entirety in all copies and derived works. + * + * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, + * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS + * FITNESS FOR ANY PARTICULAR PURPOSE. + * + * Author: Jamey Hicks. + * + * History: + * + * 2001-10-?? Andrew Christian Added support for iPAQ H3800 + * + */ + +#ifndef _INCLUDE_H3XXX_H_ +#define _INCLUDE_H3XXX_H_ + +/* Physical memory regions corresponding to chip selects */ +#define H3600_EGPIO_PHYS (SA1100_CS5_PHYS + 0x01000000) +#define H3600_BANK_2_PHYS SA1100_CS2_PHYS +#define H3600_BANK_4_PHYS SA1100_CS4_PHYS + +/* Virtual memory regions corresponding to chip selects 2 & 4 (used on sleeves) */ +#define H3600_EGPIO_VIRT 0xf0000000 +#define H3600_BANK_2_VIRT 0xf1000000 +#define H3600_BANK_4_VIRT 0xf3800000 + +/* + * gpiolib numbers for all iPAQs + */ +#define H3XXX_GPIO_PWR_BUTTON 0 +#define H3XXX_GPIO_PCMCIA_CD1 10 +#define H3XXX_GPIO_PCMCIA_IRQ1 11 +#define H3XXX_GPIO_PCMCIA_CD0 17 +#define H3XXX_GPIO_ACTION_BUTTON 18 +#define H3XXX_GPIO_SYS_CLK 19 +#define H3XXX_GPIO_PCMCIA_IRQ0 21 +#define H3XXX_GPIO_COM_DCD 23 +#define H3XXX_GPIO_OPTION 24 +#define H3XXX_GPIO_COM_CTS 25 +#define H3XXX_GPIO_COM_RTS 26 + +/* machine-specific gpios */ + +#define H3100_GPIO_BT_ON 2 +#define H3100_GPIO_QMUTE 4 +#define H3100_GPIO_LCD_3V_ON 5 +#define H3100_GPIO_AUD_ON 6 +#define H3100_GPIO_AUD_PWR_ON 7 +#define H3100_GPIO_IR_ON 8 +#define H3100_GPIO_IR_FSEL 9 + +#define H3600_GPIO_CLK_SET0 12 /* audio sample rate clock generator */ +#define H3600_GPIO_CLK_SET1 13 +#define H3600_GPIO_SOFT_RESET 20 /* also known as BATT_FAULT */ +#define H3600_GPIO_OPT_LOCK 22 +#define H3600_GPIO_OPT_DET 27 + + +/* H3100 / 3600 EGPIO pins */ +#define H3XXX_EGPIO_BASE (GPIO_MAX + 1) + +#define H3XXX_EGPIO_VPP_ON (H3XXX_EGPIO_BASE + 0) +#define H3XXX_EGPIO_CARD_RESET (H3XXX_EGPIO_BASE + 1) /* reset the attached pcmcia/compactflash card. active high. */ +#define H3XXX_EGPIO_OPT_RESET (H3XXX_EGPIO_BASE + 2) /* reset the attached option pack. active high. */ +#define H3XXX_EGPIO_CODEC_NRESET (H3XXX_EGPIO_BASE + 3) /* reset the onboard UDA1341. active low. */ +#define H3XXX_EGPIO_OPT_NVRAM_ON (H3XXX_EGPIO_BASE + 4) /* apply power to optionpack nvram, active high. */ +#define H3XXX_EGPIO_OPT_ON (H3XXX_EGPIO_BASE + 5) /* full power to option pack. active high. */ +#define H3XXX_EGPIO_LCD_ON (H3XXX_EGPIO_BASE + 6) /* enable 3.3V to LCD. active high. */ +#define H3XXX_EGPIO_RS232_ON (H3XXX_EGPIO_BASE + 7) /* UART3 transceiver force on. Active high. */ + +/* H3600 only EGPIO pins */ +#define H3600_EGPIO_LCD_PCI (H3XXX_EGPIO_BASE + 8) /* LCD control IC enable. active high. */ +#define H3600_EGPIO_IR_ON (H3XXX_EGPIO_BASE + 9) /* apply power to IR module. active high. */ +#define H3600_EGPIO_AUD_AMP_ON (H3XXX_EGPIO_BASE + 10) /* apply power to audio power amp. active high. */ +#define H3600_EGPIO_AUD_PWR_ON (H3XXX_EGPIO_BASE + 11) /* apply power to reset of audio circuit. active high. */ +#define H3600_EGPIO_QMUTE (H3XXX_EGPIO_BASE + 12) /* mute control for onboard UDA1341. active high. */ +#define H3600_EGPIO_IR_FSEL (H3XXX_EGPIO_BASE + 13) /* IR speed select: 1->fast, 0->slow */ +#define H3600_EGPIO_LCD_5V_ON (H3XXX_EGPIO_BASE + 14) /* enable 5V to LCD. active high. */ +#define H3600_EGPIO_LVDD_ON (H3XXX_EGPIO_BASE + 15) /* enable 9V and -6.5V to LCD. */ + +#endif /* _INCLUDE_H3XXX_H_ */ diff --git a/drivers/pcmcia/sa1100_h3600.c b/drivers/pcmcia/sa1100_h3600.c index fd7af12..8706d42 100644 --- a/drivers/pcmcia/sa1100_h3600.c +++ b/drivers/pcmcia/sa1100_h3600.c @@ -15,8 +15,7 @@ #include #include #include -#include -#include +#include #include "sa1100_generic.h" -- cgit v0.10.2 From 86e5e38c46b1d188c897f131d3f015ca73677f03 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:10:55 +0100 Subject: ARM: 5820/1: SA1100: h3100/h3600: split h3600.c Split common h3600.c into three separate files: h3100.c, h3600.c and h3xxx.c (the latter contains common code for h3100/h3600) Copyright boilerplates and #includes are copied intact and will be cleaned up later. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile index bb7b819..89349c1 100644 --- a/arch/arm/mach-sa1100/Makefile +++ b/arch/arm/mach-sa1100/Makefile @@ -25,8 +25,8 @@ led-$(CONFIG_SA1100_CERF) += leds-cerf.o obj-$(CONFIG_SA1100_COLLIE) += collie.o -obj-$(CONFIG_SA1100_H3100) += h3600.o -obj-$(CONFIG_SA1100_H3600) += h3600.o +obj-$(CONFIG_SA1100_H3100) += h3100.o h3xxx.o +obj-$(CONFIG_SA1100_H3600) += h3600.o h3xxx.o obj-$(CONFIG_SA1100_HACKKIT) += hackkit.o led-$(CONFIG_SA1100_HACKKIT) += leds-hackkit.o diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c new file mode 100644 index 0000000..4c7778e --- /dev/null +++ b/arch/arm/mach-sa1100/h3100.c @@ -0,0 +1,119 @@ +/* + * Hardware definitions for Compaq iPAQ H3xxx Handheld Computers + * + * Copyright 2000,1 Compaq Computer Corporation. + * + * Use consistent with the GNU GPL is permitted, + * provided that this copyright notice is + * preserved in its entirety in all copies and derived works. + * + * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, + * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS + * FITNESS FOR ANY PARTICULAR PURPOSE. + * + * Author: Jamey Hicks. + * + * History: + * + * 2001-10-?? Andrew Christian Added support for iPAQ H3800 + * and abstracted EGPIO interface. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "generic.h" + +/* + * helper for sa1100fb + */ +static void h3100_lcd_power(int enable) +{ + if (!gpio_request(H3XXX_EGPIO_LCD_ON, "LCD ON")) { + gpio_set_value(H3100_GPIO_LCD_3V_ON, enable); + gpio_direction_output(H3XXX_EGPIO_LCD_ON, enable); + gpio_free(H3XXX_EGPIO_LCD_ON); + } +} + + +static void __init h3100_map_io(void) +{ + h3xxx_map_io(); + + sa1100fb_lcd_power = h3100_lcd_power; + + /* Older bootldrs put GPIO2-9 in alternate mode on the + assumption that they are used for video */ + GAFR &= ~0x000001fb; +} + +/* + * This turns the IRDA power on or off on the Compaq H3100 + */ +static int h3100_irda_set_power(struct device *dev, unsigned int state) +{ + gpio_set_value(H3100_GPIO_IR_ON, state); + return 0; +} + +static void h3100_irda_set_speed(struct device *dev, unsigned int speed) +{ + gpio_set_value(H3100_GPIO_IR_FSEL, !(speed < 4000000)); +} + +static struct irda_platform_data h3100_irda_data = { + .set_power = h3100_irda_set_power, + .set_speed = h3100_irda_set_speed, +}; + +static struct gpio_default_state h3100_default_gpio[] = { + { H3100_GPIO_IR_ON, GPIO_MODE_OUT0, "IrDA power" }, + { H3100_GPIO_IR_FSEL, GPIO_MODE_OUT0, "IrDA fsel" }, + { H3XXX_GPIO_COM_DCD, GPIO_MODE_IN, "COM DCD" }, + { H3XXX_GPIO_COM_CTS, GPIO_MODE_IN, "COM CTS" }, + { H3XXX_GPIO_COM_RTS, GPIO_MODE_OUT0, "COM RTS" }, + { H3100_GPIO_LCD_3V_ON, GPIO_MODE_OUT0, "LCD 3v" }, +}; + +static void __init h3100_mach_init(void) +{ + h3xxx_init_gpio(h3100_default_gpio, ARRAY_SIZE(h3100_default_gpio)); + h3xxx_mach_init(); + sa11x0_register_irda(&h3100_irda_data); +} + +MACHINE_START(H3100, "Compaq iPAQ H3100") + .phys_io = 0x80000000, + .io_pg_offst = ((0xf8000000) >> 18) & 0xfffc, + .boot_params = 0xc0000100, + .map_io = h3100_map_io, + .init_irq = sa1100_init_irq, + .timer = &sa1100_timer, + .init_machine = h3100_mach_init, +MACHINE_END + diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index dd39f90..65ab286 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -48,337 +48,6 @@ #include "generic.h" -struct gpio_default_state { - int gpio; - int mode; - const char *name; -}; - -#define GPIO_MODE_IN -1 -#define GPIO_MODE_OUT0 0 -#define GPIO_MODE_OUT1 1 - -static void h3xxx_init_gpio(struct gpio_default_state *s, size_t n) -{ - while (n--) { - const char *name = s->name; - int err; - - if (!name) - name = "[init]"; - err = gpio_request(s->gpio, name); - if (err) { - printk(KERN_ERR "gpio%u: unable to request: %d\n", - s->gpio, err); - continue; - } - if (s->mode >= 0) { - err = gpio_direction_output(s->gpio, s->mode); - } else { - err = gpio_direction_input(s->gpio); - } - if (err) { - printk(KERN_ERR "gpio%u: unable to set direction: %d\n", - s->gpio, err); - continue; - } - if (!s->name) - gpio_free(s->gpio); - s++; - } -} - - -/* - * H3xxx flash support - */ -static struct mtd_partition h3xxx_partitions[] = { - { - .name = "H3XXX boot firmware", - .size = 0x00040000, - .offset = 0, - .mask_flags = MTD_WRITEABLE, /* force read-only */ - }, { - .name = "H3XXX rootfs", - .size = MTDPART_SIZ_FULL, - .offset = 0x00040000, - } -}; - -static void h3xxx_set_vpp(int vpp) -{ - gpio_set_value(H3XXX_EGPIO_VPP_ON, vpp); -} - -static int h3xxx_flash_init(void) -{ - int err = gpio_request(H3XXX_EGPIO_VPP_ON, "Flash Vpp"); - if (err) - return err; - - err = gpio_direction_output(H3XXX_EGPIO_VPP_ON, 0); - if (err) - gpio_free(H3XXX_EGPIO_VPP_ON); - - return err; -} - -static void h3xxx_flash_exit(void) -{ - gpio_free(H3XXX_EGPIO_VPP_ON); -} - -static struct flash_platform_data h3xxx_flash_data = { - .map_name = "cfi_probe", - .set_vpp = h3xxx_set_vpp, - .init = h3xxx_flash_init, - .exit = h3xxx_flash_exit, - .parts = h3xxx_partitions, - .nr_parts = ARRAY_SIZE(h3xxx_partitions), -}; - -static struct resource h3xxx_flash_resource = { - .start = SA1100_CS0_PHYS, - .end = SA1100_CS0_PHYS + SZ_32M - 1, - .flags = IORESOURCE_MEM, -}; - - -/* - * H3xxx uart support - */ -static void h3xxx_uart_set_mctrl(struct uart_port *port, u_int mctrl) -{ - if (port->mapbase == _Ser3UTCR0) { - gpio_set_value(H3XXX_GPIO_COM_RTS, !(mctrl & TIOCM_RTS)); - } -} - -static u_int h3xxx_uart_get_mctrl(struct uart_port *port) -{ - u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR; - - if (port->mapbase == _Ser3UTCR0) { - /* - * DCD and CTS bits are inverted in GPLR by RS232 transceiver - */ - if (gpio_get_value(H3XXX_GPIO_COM_DCD)) - ret &= ~TIOCM_CD; - if (gpio_get_value(H3XXX_GPIO_COM_CTS)) - ret &= ~TIOCM_CTS; - } - - return ret; -} - -static void h3xxx_uart_pm(struct uart_port *port, u_int state, u_int oldstate) -{ - if (port->mapbase == _Ser3UTCR0) - if (!gpio_request(H3XXX_EGPIO_RS232_ON, "RS232 transceiver")) { - gpio_direction_output(H3XXX_EGPIO_RS232_ON, !state); - gpio_free(H3XXX_EGPIO_RS232_ON); - } -} - -/* - * Enable/Disable wake up events for this serial port. - * Obviously, we only support this on the normal COM port. - */ -static int h3xxx_uart_set_wake(struct uart_port *port, u_int enable) -{ - int err = -EINVAL; - - if (port->mapbase == _Ser3UTCR0) { - if (enable) - PWER |= PWER_GPIO23 | PWER_GPIO25; /* DCD and CTS */ - else - PWER &= ~(PWER_GPIO23 | PWER_GPIO25); /* DCD and CTS */ - err = 0; - } - return err; -} - -static struct sa1100_port_fns h3xxx_port_fns __initdata = { - .set_mctrl = h3xxx_uart_set_mctrl, - .get_mctrl = h3xxx_uart_get_mctrl, - .pm = h3xxx_uart_pm, - .set_wake = h3xxx_uart_set_wake, -}; - -/* - * EGPIO - */ - -static struct resource egpio_resources[] = { - [0] = { - .start = H3600_EGPIO_PHYS, - .end = H3600_EGPIO_PHYS + 0x4 - 1, - .flags = IORESOURCE_MEM, - }, -}; - -static struct htc_egpio_chip egpio_chips[] = { - [0] = { - .reg_start = 0, - .gpio_base = H3XXX_EGPIO_BASE, - .num_gpios = 16, - .direction = HTC_EGPIO_OUTPUT, - .initial_values = 0x0080, /* H3XXX_EGPIO_RS232_ON */ - }, -}; - -static struct htc_egpio_platform_data egpio_info = { - .reg_width = 16, - .bus_width = 16, - .chip = egpio_chips, - .num_chips = ARRAY_SIZE(egpio_chips), -}; - -static struct platform_device h3xxx_egpio = { - .name = "htc-egpio", - .id = -1, - .resource = egpio_resources, - .num_resources = ARRAY_SIZE(egpio_resources), - .dev = { - .platform_data = &egpio_info, - }, -}; - -static struct platform_device *h3xxx_devices[] = { - &h3xxx_egpio, -}; - -static void __init h3xxx_mach_init(void) -{ - sa1100_register_uart_fns(&h3xxx_port_fns); - sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1); - platform_add_devices(h3xxx_devices, ARRAY_SIZE(h3xxx_devices)); -} - -static struct map_desc h3600_io_desc[] __initdata = { - { /* static memory bank 2 CS#2 */ - .virtual = H3600_BANK_2_VIRT, - .pfn = __phys_to_pfn(SA1100_CS2_PHYS), - .length = 0x02800000, - .type = MT_DEVICE - }, { /* static memory bank 4 CS#4 */ - .virtual = H3600_BANK_4_VIRT, - .pfn = __phys_to_pfn(SA1100_CS4_PHYS), - .length = 0x00800000, - .type = MT_DEVICE - }, { /* EGPIO 0 CS#5 */ - .virtual = H3600_EGPIO_VIRT, - .pfn = __phys_to_pfn(H3600_EGPIO_PHYS), - .length = 0x01000000, - .type = MT_DEVICE - } -}; - -/* - * Common map_io initialization - */ - -static void __init h3xxx_map_io(void) -{ - sa1100_map_io(); - iotable_init(h3600_io_desc, ARRAY_SIZE(h3600_io_desc)); - - sa1100_register_uart(0, 3); /* Common serial port */ -// sa1100_register_uart(1, 1); /* Microcontroller on 3100/3600 */ - - /* Ensure those pins are outputs and driving low */ - PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; - PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); - - /* Configure suspend conditions */ - PGSR = 0; - PWER = PWER_GPIO0; - PCFR = PCFR_OPDE; - PSDR = 0; - - GPCR = 0x0fffffff; /* All outputs are set low by default */ - GPDR = 0; /* Configure all GPIOs as input */ -} - -/************************* H3100 *************************/ - -#ifdef CONFIG_SA1100_H3100 - -/* - * helper for sa1100fb - */ -static void h3100_lcd_power(int enable) -{ - if (!gpio_request(H3XXX_EGPIO_LCD_ON, "LCD ON")) { - gpio_set_value(H3100_GPIO_LCD_3V_ON, enable); - gpio_direction_output(H3XXX_EGPIO_LCD_ON, enable); - gpio_free(H3XXX_EGPIO_LCD_ON); - } -} - - -static void __init h3100_map_io(void) -{ - h3xxx_map_io(); - - sa1100fb_lcd_power = h3100_lcd_power; - - /* Older bootldrs put GPIO2-9 in alternate mode on the - assumption that they are used for video */ - GAFR &= ~0x000001fb; -} - -/* - * This turns the IRDA power on or off on the Compaq H3100 - */ -static int h3100_irda_set_power(struct device *dev, unsigned int state) -{ - gpio_set_value(H3100_GPIO_IR_ON, state); - return 0; -} - -static void h3100_irda_set_speed(struct device *dev, unsigned int speed) -{ - gpio_set_value(H3100_GPIO_IR_FSEL, !(speed < 4000000)); -} - -static struct irda_platform_data h3100_irda_data = { - .set_power = h3100_irda_set_power, - .set_speed = h3100_irda_set_speed, -}; - -static struct gpio_default_state h3100_default_gpio[] = { - { H3100_GPIO_IR_ON, GPIO_MODE_OUT0, "IrDA power" }, - { H3100_GPIO_IR_FSEL, GPIO_MODE_OUT0, "IrDA fsel" }, - { H3XXX_GPIO_COM_DCD, GPIO_MODE_IN, "COM DCD" }, - { H3XXX_GPIO_COM_CTS, GPIO_MODE_IN, "COM CTS" }, - { H3XXX_GPIO_COM_RTS, GPIO_MODE_OUT0, "COM RTS" }, - { H3100_GPIO_LCD_3V_ON, GPIO_MODE_OUT0, "LCD 3v" }, -}; - -static void __init h3100_mach_init(void) -{ - h3xxx_init_gpio(h3100_default_gpio, ARRAY_SIZE(h3100_default_gpio)); - h3xxx_mach_init(); - sa11x0_register_irda(&h3100_irda_data); -} - -MACHINE_START(H3100, "Compaq iPAQ H3100") - .phys_io = 0x80000000, - .io_pg_offst = ((0xf8000000) >> 18) & 0xfffc, - .boot_params = 0xc0000100, - .map_io = h3100_map_io, - .init_irq = sa1100_init_irq, - .timer = &sa1100_timer, - .init_machine = h3100_mach_init, -MACHINE_END - -#endif /* CONFIG_SA1100_H3100 */ - -/************************* H3600 *************************/ - -#ifdef CONFIG_SA1100_H3600 - /* * helper for sa1100fb */ @@ -483,5 +152,3 @@ MACHINE_START(H3600, "Compaq iPAQ H3600") .init_machine = h3600_mach_init, MACHINE_END -#endif /* CONFIG_SA1100_H3600 */ - diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c new file mode 100644 index 0000000..bb608d4 --- /dev/null +++ b/arch/arm/mach-sa1100/h3xxx.c @@ -0,0 +1,292 @@ +/* + * Hardware definitions for Compaq iPAQ H3xxx Handheld Computers + * + * Copyright 2000,1 Compaq Computer Corporation. + * + * Use consistent with the GNU GPL is permitted, + * provided that this copyright notice is + * preserved in its entirety in all copies and derived works. + * + * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, + * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS + * FITNESS FOR ANY PARTICULAR PURPOSE. + * + * Author: Jamey Hicks. + * + * History: + * + * 2001-10-?? Andrew Christian Added support for iPAQ H3800 + * and abstracted EGPIO interface. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "generic.h" + +void h3xxx_init_gpio(struct gpio_default_state *s, size_t n) +{ + while (n--) { + const char *name = s->name; + int err; + + if (!name) + name = "[init]"; + err = gpio_request(s->gpio, name); + if (err) { + printk(KERN_ERR "gpio%u: unable to request: %d\n", + s->gpio, err); + continue; + } + if (s->mode >= 0) { + err = gpio_direction_output(s->gpio, s->mode); + } else { + err = gpio_direction_input(s->gpio); + } + if (err) { + printk(KERN_ERR "gpio%u: unable to set direction: %d\n", + s->gpio, err); + continue; + } + if (!s->name) + gpio_free(s->gpio); + s++; + } +} + + +/* + * H3xxx flash support + */ +static struct mtd_partition h3xxx_partitions[] = { + { + .name = "H3XXX boot firmware", + .size = 0x00040000, + .offset = 0, + .mask_flags = MTD_WRITEABLE, /* force read-only */ + }, { + .name = "H3XXX rootfs", + .size = MTDPART_SIZ_FULL, + .offset = 0x00040000, + } +}; + +static void h3xxx_set_vpp(int vpp) +{ + gpio_set_value(H3XXX_EGPIO_VPP_ON, vpp); +} + +static int h3xxx_flash_init(void) +{ + int err = gpio_request(H3XXX_EGPIO_VPP_ON, "Flash Vpp"); + if (err) + return err; + + err = gpio_direction_output(H3XXX_EGPIO_VPP_ON, 0); + if (err) + gpio_free(H3XXX_EGPIO_VPP_ON); + + return err; +} + +static void h3xxx_flash_exit(void) +{ + gpio_free(H3XXX_EGPIO_VPP_ON); +} + +static struct flash_platform_data h3xxx_flash_data = { + .map_name = "cfi_probe", + .set_vpp = h3xxx_set_vpp, + .init = h3xxx_flash_init, + .exit = h3xxx_flash_exit, + .parts = h3xxx_partitions, + .nr_parts = ARRAY_SIZE(h3xxx_partitions), +}; + +static struct resource h3xxx_flash_resource = { + .start = SA1100_CS0_PHYS, + .end = SA1100_CS0_PHYS + SZ_32M - 1, + .flags = IORESOURCE_MEM, +}; + + +/* + * H3xxx uart support + */ +static void h3xxx_uart_set_mctrl(struct uart_port *port, u_int mctrl) +{ + if (port->mapbase == _Ser3UTCR0) { + gpio_set_value(H3XXX_GPIO_COM_RTS, !(mctrl & TIOCM_RTS)); + } +} + +static u_int h3xxx_uart_get_mctrl(struct uart_port *port) +{ + u_int ret = TIOCM_CD | TIOCM_CTS | TIOCM_DSR; + + if (port->mapbase == _Ser3UTCR0) { + /* + * DCD and CTS bits are inverted in GPLR by RS232 transceiver + */ + if (gpio_get_value(H3XXX_GPIO_COM_DCD)) + ret &= ~TIOCM_CD; + if (gpio_get_value(H3XXX_GPIO_COM_CTS)) + ret &= ~TIOCM_CTS; + } + + return ret; +} + +static void h3xxx_uart_pm(struct uart_port *port, u_int state, u_int oldstate) +{ + if (port->mapbase == _Ser3UTCR0) + if (!gpio_request(H3XXX_EGPIO_RS232_ON, "RS232 transceiver")) { + gpio_direction_output(H3XXX_EGPIO_RS232_ON, !state); + gpio_free(H3XXX_EGPIO_RS232_ON); + } +} + +/* + * Enable/Disable wake up events for this serial port. + * Obviously, we only support this on the normal COM port. + */ +static int h3xxx_uart_set_wake(struct uart_port *port, u_int enable) +{ + int err = -EINVAL; + + if (port->mapbase == _Ser3UTCR0) { + if (enable) + PWER |= PWER_GPIO23 | PWER_GPIO25; /* DCD and CTS */ + else + PWER &= ~(PWER_GPIO23 | PWER_GPIO25); /* DCD and CTS */ + err = 0; + } + return err; +} + +static struct sa1100_port_fns h3xxx_port_fns __initdata = { + .set_mctrl = h3xxx_uart_set_mctrl, + .get_mctrl = h3xxx_uart_get_mctrl, + .pm = h3xxx_uart_pm, + .set_wake = h3xxx_uart_set_wake, +}; + +/* + * EGPIO + */ + +static struct resource egpio_resources[] = { + [0] = { + .start = H3600_EGPIO_PHYS, + .end = H3600_EGPIO_PHYS + 0x4 - 1, + .flags = IORESOURCE_MEM, + }, +}; + +static struct htc_egpio_chip egpio_chips[] = { + [0] = { + .reg_start = 0, + .gpio_base = H3XXX_EGPIO_BASE, + .num_gpios = 16, + .direction = HTC_EGPIO_OUTPUT, + .initial_values = 0x0080, /* H3XXX_EGPIO_RS232_ON */ + }, +}; + +static struct htc_egpio_platform_data egpio_info = { + .reg_width = 16, + .bus_width = 16, + .chip = egpio_chips, + .num_chips = ARRAY_SIZE(egpio_chips), +}; + +static struct platform_device h3xxx_egpio = { + .name = "htc-egpio", + .id = -1, + .resource = egpio_resources, + .num_resources = ARRAY_SIZE(egpio_resources), + .dev = { + .platform_data = &egpio_info, + }, +}; + +static struct platform_device *h3xxx_devices[] = { + &h3xxx_egpio, +}; + +void __init h3xxx_mach_init(void) +{ + sa1100_register_uart_fns(&h3xxx_port_fns); + sa11x0_register_mtd(&h3xxx_flash_data, &h3xxx_flash_resource, 1); + platform_add_devices(h3xxx_devices, ARRAY_SIZE(h3xxx_devices)); +} + +static struct map_desc h3600_io_desc[] __initdata = { + { /* static memory bank 2 CS#2 */ + .virtual = H3600_BANK_2_VIRT, + .pfn = __phys_to_pfn(SA1100_CS2_PHYS), + .length = 0x02800000, + .type = MT_DEVICE + }, { /* static memory bank 4 CS#4 */ + .virtual = H3600_BANK_4_VIRT, + .pfn = __phys_to_pfn(SA1100_CS4_PHYS), + .length = 0x00800000, + .type = MT_DEVICE + }, { /* EGPIO 0 CS#5 */ + .virtual = H3600_EGPIO_VIRT, + .pfn = __phys_to_pfn(H3600_EGPIO_PHYS), + .length = 0x01000000, + .type = MT_DEVICE + } +}; + +/* + * Common map_io initialization + */ + +void __init h3xxx_map_io(void) +{ + sa1100_map_io(); + iotable_init(h3600_io_desc, ARRAY_SIZE(h3600_io_desc)); + + sa1100_register_uart(0, 3); /* Common serial port */ +// sa1100_register_uart(1, 1); /* Microcontroller on 3100/3600 */ + + /* Ensure those pins are outputs and driving low */ + PPDR |= PPC_TXD4 | PPC_SCLK | PPC_SFRM; + PPSR &= ~(PPC_TXD4 | PPC_SCLK | PPC_SFRM); + + /* Configure suspend conditions */ + PGSR = 0; + PWER = PWER_GPIO0; + PCFR = PCFR_OPDE; + PSDR = 0; + + GPCR = 0x0fffffff; /* All outputs are set low by default */ + GPDR = 0; /* Configure all GPIOs as input */ +} + diff --git a/arch/arm/mach-sa1100/include/mach/h3xxx.h b/arch/arm/mach-sa1100/include/mach/h3xxx.h index 7f1e130..d8a6e7c 100644 --- a/arch/arm/mach-sa1100/include/mach/h3xxx.h +++ b/arch/arm/mach-sa1100/include/mach/h3xxx.h @@ -87,4 +87,18 @@ #define H3600_EGPIO_LCD_5V_ON (H3XXX_EGPIO_BASE + 14) /* enable 5V to LCD. active high. */ #define H3600_EGPIO_LVDD_ON (H3XXX_EGPIO_BASE + 15) /* enable 9V and -6.5V to LCD. */ +struct gpio_default_state { + int gpio; + int mode; + const char *name; +}; + +#define GPIO_MODE_IN -1 +#define GPIO_MODE_OUT0 0 +#define GPIO_MODE_OUT1 1 + +void h3xxx_init_gpio(struct gpio_default_state *s, size_t n); +void __init h3xxx_map_io(void); +void __init h3xxx_mach_init(void); + #endif /* _INCLUDE_H3XXX_H_ */ -- cgit v0.10.2 From 6e23fcb3bdee99468ff16d050b63a0c7fe103992 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:11:48 +0100 Subject: ARM: 5821/1: SA1100: h3100/h3600: revise copyright boilerplates Correct boilerplates after files split. Also shorten them a bit - use standart GPL wording (as per http://lkml.org/lkml/2007/5/1/220) and drop changelog, which only entry about h3800 support and abstracted EGPIOs is just confusing now, as both of these features are gone. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c index 4c7778e..303bcd1 100644 --- a/arch/arm/mach-sa1100/h3100.c +++ b/arch/arm/mach-sa1100/h3100.c @@ -1,24 +1,15 @@ /* - * Hardware definitions for Compaq iPAQ H3xxx Handheld Computers + * Support for Compaq iPAQ H3100 handheld computer * - * Copyright 2000,1 Compaq Computer Corporation. + * Copyright (c) 2000,1 Compaq Computer Corporation. (Author: Jamey Hicks) + * Copyright (c) 2009 Dmitry Artamonow * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 - * and abstracted EGPIO interface. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. * */ + #include #include #include diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 65ab286..25f189b 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -1,24 +1,15 @@ /* - * Hardware definitions for Compaq iPAQ H3xxx Handheld Computers + * Support for Compaq iPAQ H3600 handheld computer * - * Copyright 2000,1 Compaq Computer Corporation. + * Copyright (c) 2000,1 Compaq Computer Corporation. (Author: Jamey Hicks) + * Copyright (c) 2009 Dmitry Artamonow * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 - * and abstracted EGPIO interface. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. * */ + #include #include #include diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c index bb608d4..08bb266 100644 --- a/arch/arm/mach-sa1100/h3xxx.c +++ b/arch/arm/mach-sa1100/h3xxx.c @@ -1,24 +1,15 @@ /* - * Hardware definitions for Compaq iPAQ H3xxx Handheld Computers + * Support for Compaq iPAQ H3100 and H3600 handheld computers (common code) * - * Copyright 2000,1 Compaq Computer Corporation. + * Copyright (c) 2000,1 Compaq Computer Corporation. (Author: Jamey Hicks) + * Copyright (c) 2009 Dmitry Artamonow * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 - * and abstracted EGPIO interface. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. * */ + #include #include #include diff --git a/arch/arm/mach-sa1100/include/mach/h3xxx.h b/arch/arm/mach-sa1100/include/mach/h3xxx.h index d8a6e7c..7d9df16 100644 --- a/arch/arm/mach-sa1100/include/mach/h3xxx.h +++ b/arch/arm/mach-sa1100/include/mach/h3xxx.h @@ -1,22 +1,12 @@ /* + * Definitions for Compaq iPAQ H3100 and H3600 handheld computers * - * Definitions for H3600 Handheld Computer + * (c) 2000 Compaq Computer Corporation. (Author: Jamey Hicks) + * (c) 2009 Dmitry Artamonow * - * Copyright 2000 Compaq Computer Corporation. - * - * Use consistent with the GNU GPL is permitted, - * provided that this copyright notice is - * preserved in its entirety in all copies and derived works. - * - * COMPAQ COMPUTER CORPORATION MAKES NO WARRANTIES, EXPRESSED OR IMPLIED, - * AS TO THE USEFULNESS OR CORRECTNESS OF THIS CODE OR ITS - * FITNESS FOR ANY PARTICULAR PURPOSE. - * - * Author: Jamey Hicks. - * - * History: - * - * 2001-10-?? Andrew Christian Added support for iPAQ H3800 + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. * */ -- cgit v0.10.2 From 4aa9755580650c7135c154bd4276411739e8fb60 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:12:25 +0100 Subject: ARM: 5822/1: SA1100: h3100/h3600: clean up #includes After a code reorganization and following split, there's some #includes now unused. Clean them up and sort remaining alphabetticaly where possible. Compile tested. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c index 303bcd1..793e68c 100644 --- a/arch/arm/mach-sa1100/h3100.c +++ b/arch/arm/mach-sa1100/h3100.c @@ -10,30 +10,13 @@ * */ -#include #include #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include - -#include #include -#include #include -#include -#include #include diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 25f189b..47587cc 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -10,30 +10,13 @@ * */ -#include #include #include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include -#include - -#include #include -#include #include -#include -#include #include diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c index 08bb266..c054c64 100644 --- a/arch/arm/mach-sa1100/h3xxx.c +++ b/arch/arm/mach-sa1100/h3xxx.c @@ -10,28 +10,15 @@ * */ -#include -#include #include -#include -#include -#include +#include #include #include #include -#include -#include #include +#include -#include -#include -#include -#include - -#include -#include #include -#include #include #include -- cgit v0.10.2 From c463eb62ac5bd8c26578cae42e1d5bb397aed100 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:13:01 +0100 Subject: ARM: 5823/1: SA1100: h3100/h3600: add support for gpio-keys Add support for "Power" and "Action" (joystick center) buttons - the only buttons on iPaq h3100/h3600 connected to GPIOs (other buttons are controlled by microcontroller) Also remove setting PWER for wakeup on Power button press - gpio-keys driver will handle it. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c index c054c64..a7a1982 100644 --- a/arch/arm/mach-sa1100/h3xxx.c +++ b/arch/arm/mach-sa1100/h3xxx.c @@ -12,6 +12,8 @@ #include #include +#include +#include #include #include #include @@ -212,8 +214,44 @@ static struct platform_device h3xxx_egpio = { }, }; +/* + * GPIO keys + */ + +static struct gpio_keys_button h3xxx_button_table[] = { + { + .code = KEY_POWER, + .gpio = H3XXX_GPIO_PWR_BUTTON, + .desc = "Power Button", + .active_low = 1, + .type = EV_KEY, + .wakeup = 1, + }, { + .code = KEY_ENTER, + .gpio = H3XXX_GPIO_ACTION_BUTTON, + .active_low = 1, + .desc = "Action button", + .type = EV_KEY, + .wakeup = 0, + }, +}; + +static struct gpio_keys_platform_data h3xxx_keys_data = { + .buttons = h3xxx_button_table, + .nbuttons = ARRAY_SIZE(h3xxx_button_table), +}; + +static struct platform_device h3xxx_keys = { + .name = "gpio-keys", + .id = -1, + .dev = { + .platform_data = &h3xxx_keys_data, + }, +}; + static struct platform_device *h3xxx_devices[] = { &h3xxx_egpio, + &h3xxx_keys, }; void __init h3xxx_mach_init(void) @@ -260,7 +298,6 @@ void __init h3xxx_map_io(void) /* Configure suspend conditions */ PGSR = 0; - PWER = PWER_GPIO0; PCFR = PCFR_OPDE; PSDR = 0; -- cgit v0.10.2 From e7435f866f86a9a1843dcdc2945f833b26761786 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:13:47 +0100 Subject: ARM: 5824/1: SA1100: reuse h3600 PCMCIA driver on h3100 Both iPAQs h3600 and h3100 share the same control GPIOs for PCMCIA, so driver can be reused. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c index 11cc3ba..8db86b9 100644 --- a/drivers/pcmcia/sa1100_generic.c +++ b/drivers/pcmcia/sa1100_generic.c @@ -51,7 +51,7 @@ static int (*sa11x0_pcmcia_hw_init[])(struct device *dev) = { #ifdef CONFIG_SA1100_CERF pcmcia_cerf_init, #endif -#ifdef CONFIG_SA1100_H3600 +#if defined(CONFIG_SA1100_H3100) || defined(CONFIG_SA1100_H3600) pcmcia_h3600_init, #endif #ifdef CONFIG_SA1100_SHANNON diff --git a/drivers/pcmcia/sa1100_h3600.c b/drivers/pcmcia/sa1100_h3600.c index 8706d42..56329ad 100644 --- a/drivers/pcmcia/sa1100_h3600.c +++ b/drivers/pcmcia/sa1100_h3600.c @@ -223,7 +223,7 @@ int __init pcmcia_h3600_init(struct device *dev) { int ret = -ENODEV; - if (machine_is_h3600()) + if (machine_is_h3600() || machine_is_h3100()) ret = sa11xx_drv_pcmcia_probe(dev, &h3600_pcmcia_ops, 0, 2); return ret; -- cgit v0.10.2 From ebdb56409d4a65571ca3933b056c5c9e20bb34f5 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:15:01 +0100 Subject: ARM: 5825/1: SA1100: h3600: update defconfig Update defconfig to current kernel, enable support for iPAQ H3100 and following drivers: gpio-keys, htc-egpio, ide_cs. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/configs/h3600_defconfig b/arch/arm/configs/h3600_defconfig index f6aed77..efa78e1 100644 --- a/arch/arm/configs/h3600_defconfig +++ b/arch/arm/configs/h3600_defconfig @@ -1,86 +1,189 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc4 -# Thu Jun 9 01:59:03 2005 +# Linux kernel version: 2.6.32-rc5 +# Sat Oct 24 00:09:30 2009 # CONFIG_ARM=y -CONFIG_MMU=y -CONFIG_UID16=y +CONFIG_SYS_SUPPORTS_APM_EMULATION=y +CONFIG_GENERIC_GPIO=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_TRACE_IRQFLAGS_SUPPORT=y +CONFIG_HARDIRQS_SW_RESEND=y +CONFIG_GENERIC_IRQ_PROBE=y CONFIG_RWSEM_GENERIC_SPINLOCK=y +CONFIG_ARCH_HAS_CPUFREQ=y +CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_GENERIC_IOMAP=y +CONFIG_ARCH_MTD_XIP=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y +CONFIG_VECTORS_BASE=0xffff0000 +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_CONSTRUCTORS=y # -# Code maturity level options +# General setup # CONFIG_EXPERIMENTAL=y -CONFIG_CLEAN_COMPILE=y CONFIG_BROKEN_ON_SMP=y CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y CONFIG_SWAP=y CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y # CONFIG_POSIX_MQUEUE is not set # CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y +# CONFIG_TASKSTATS is not set # CONFIG_AUDIT is not set -CONFIG_HOTPLUG=y -CONFIG_KOBJECT_UEVENT=y + +# +# RCU Subsystem +# +CONFIG_TREE_RCU=y +# CONFIG_TREE_PREEMPT_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=32 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_TREE_RCU_TRACE is not set # CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_GROUP_SCHED is not set +# CONFIG_CGROUPS is not set +# CONFIG_SYSFS_DEPRECATED_V2 is not set +# CONFIG_RELAY is not set +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_IPC_NS is not set +# CONFIG_USER_NS is not set +# CONFIG_PID_NS is not set +# CONFIG_NET_NS is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +CONFIG_RD_BZIP2=y +CONFIG_RD_LZMA=y +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y # CONFIG_EMBEDDED is not set +CONFIG_UID16=y +CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_HOTPLUG=y CONFIG_PRINTK=y CONFIG_BUG=y +CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_EPOLL=y -CONFIG_CC_OPTIMIZE_FOR_SIZE=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y CONFIG_SHMEM=y -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 -# CONFIG_TINY_SHMEM is not set -CONFIG_BASE_SMALL=0 +CONFIG_AIO=y + +# +# Kernel Performance Events And Counters +# +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLUB_DEBUG=y +CONFIG_COMPAT_BRK=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y +# CONFIG_SLOB is not set +# CONFIG_PROFILING is not set +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_CLK=y # -# Loadable module support +# GCOV-based kernel profiling # +# CONFIG_SLOW_WORK is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set # CONFIG_MODULE_UNLOAD is not set -CONFIG_OBSOLETE_MODPARM=y # CONFIG_MODVERSIONS is not set # CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_KMOD is not set +CONFIG_BLOCK=y +# CONFIG_LBDAF is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +# CONFIG_IOSCHED_AS is not set +# CONFIG_IOSCHED_DEADLINE is not set +# CONFIG_IOSCHED_CFQ is not set +# CONFIG_DEFAULT_AS is not set +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_NOOP=y +CONFIG_DEFAULT_IOSCHED="noop" +CONFIG_FREEZER=y # # System Type # -# CONFIG_ARCH_CLPS7500 is not set +CONFIG_MMU=y +# CONFIG_ARCH_AAEC2000 is not set +# CONFIG_ARCH_INTEGRATOR is not set +# CONFIG_ARCH_REALVIEW is not set +# CONFIG_ARCH_VERSATILE is not set +# CONFIG_ARCH_AT91 is not set # CONFIG_ARCH_CLPS711X is not set -# CONFIG_ARCH_CO285 is not set +# CONFIG_ARCH_GEMINI is not set # CONFIG_ARCH_EBSA110 is not set +# CONFIG_ARCH_EP93XX is not set # CONFIG_ARCH_FOOTBRIDGE is not set -# CONFIG_ARCH_INTEGRATOR is not set -# CONFIG_ARCH_IOP3XX is not set -# CONFIG_ARCH_IXP4XX is not set +# CONFIG_ARCH_MXC is not set +# CONFIG_ARCH_STMP3XXX is not set +# CONFIG_ARCH_NETX is not set +# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_NOMADIK is not set +# CONFIG_ARCH_IOP13XX is not set +# CONFIG_ARCH_IOP32X is not set +# CONFIG_ARCH_IOP33X is not set +# CONFIG_ARCH_IXP23XX is not set # CONFIG_ARCH_IXP2000 is not set +# CONFIG_ARCH_IXP4XX is not set # CONFIG_ARCH_L7200 is not set +# CONFIG_ARCH_KIRKWOOD is not set +# CONFIG_ARCH_LOKI is not set +# CONFIG_ARCH_MV78XX0 is not set +# CONFIG_ARCH_ORION5X is not set +# CONFIG_ARCH_MMP is not set +# CONFIG_ARCH_KS8695 is not set +# CONFIG_ARCH_NS9XXX is not set +# CONFIG_ARCH_W90X900 is not set +# CONFIG_ARCH_PNX4008 is not set # CONFIG_ARCH_PXA is not set +# CONFIG_ARCH_MSM is not set # CONFIG_ARCH_RPC is not set CONFIG_ARCH_SA1100=y # CONFIG_ARCH_S3C2410 is not set +# CONFIG_ARCH_S3C64XX is not set +# CONFIG_ARCH_S5PC1XX is not set # CONFIG_ARCH_SHARK is not set # CONFIG_ARCH_LH7A40X is not set +# CONFIG_ARCH_U300 is not set +# CONFIG_ARCH_DAVINCI is not set # CONFIG_ARCH_OMAP is not set -# CONFIG_ARCH_VERSATILE is not set -# CONFIG_ARCH_IMX is not set -# CONFIG_ARCH_H720X is not set +# CONFIG_ARCH_BCMRING is not set # # SA11x0 Implementations @@ -106,27 +209,31 @@ CONFIG_CPU_32=y CONFIG_CPU_SA1100=y CONFIG_CPU_32v4=y CONFIG_CPU_ABRT_EV4=y +CONFIG_CPU_PABRT_LEGACY=y CONFIG_CPU_CACHE_V4WB=y CONFIG_CPU_CACHE_VIVT=y CONFIG_CPU_TLB_V4WB=y -CONFIG_CPU_MINICACHE=y +CONFIG_CPU_CP15=y +CONFIG_CPU_CP15_MMU=y # # Processor Features # +# CONFIG_CPU_ICACHE_DISABLE is not set +# CONFIG_CPU_DCACHE_DISABLE is not set +CONFIG_ARM_L1_CACHE_SHIFT=5 # # Bus support # CONFIG_ISA=y -CONFIG_ISA_DMA_API=y - -# -# PCCARD (PCMCIA/CardBus) support -# +# CONFIG_PCI_SYSCALL is not set +# CONFIG_ARCH_SUPPORTS_MSI is not set CONFIG_PCCARD=y # CONFIG_PCMCIA_DEBUG is not set CONFIG_PCMCIA=y +CONFIG_PCMCIA_LOAD_CIS=y +CONFIG_PCMCIA_IOCTL=y # # PC-card bridges @@ -138,11 +245,41 @@ CONFIG_PCMCIA_SA1100=y # # Kernel Features # -# CONFIG_SMP is not set +CONFIG_TICK_ONESHOT=y +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_VMSPLIT_3G=y +# CONFIG_VMSPLIT_2G is not set +# CONFIG_VMSPLIT_1G is not set +CONFIG_PAGE_OFFSET=0xC0000000 +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set -CONFIG_DISCONTIGMEM=y +CONFIG_HZ=100 +# CONFIG_AEABI is not set +CONFIG_ARCH_SPARSEMEM_ENABLE=y +CONFIG_ARCH_SPARSEMEM_DEFAULT=y +# CONFIG_ARCH_SELECT_MEMORY_MODEL is not set +# CONFIG_HIGHMEM is not set +CONFIG_SELECT_MEMORY_MODEL=y +# CONFIG_FLATMEM_MANUAL is not set +# CONFIG_DISCONTIGMEM_MANUAL is not set +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_SPARSEMEM=y +CONFIG_HAVE_MEMORY_PRESENT=y +CONFIG_SPARSEMEM_EXTREME=y +CONFIG_SPLIT_PTLOCK_CPUS=4096 +# CONFIG_PHYS_ADDR_T_64BIT is not set +CONFIG_ZONE_DMA_FLAG=0 +CONFIG_VIRT_TO_BUS=y +CONFIG_HAVE_MLOCK=y +CONFIG_HAVE_MLOCKED_PAGE_BIT=y +# CONFIG_KSM is not set +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 # CONFIG_LEDS is not set CONFIG_ALIGNMENT_TRAP=y +# CONFIG_UACCESS_WITH_MEMCPY is not set # # Boot options @@ -151,22 +288,26 @@ CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_CMDLINE="" # CONFIG_XIP_KERNEL is not set +# CONFIG_KEXEC is not set # -# CPU Frequency scaling +# CPU Power Management # CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_TABLE=y # CONFIG_CPU_FREQ_DEBUG is not set -CONFIG_CPU_FREQ_STAT=y -# CONFIG_CPU_FREQ_STAT_DETAILS is not set +# CONFIG_CPU_FREQ_STAT is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y +# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set # CONFIG_CPU_FREQ_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set CONFIG_CPU_FREQ_GOV_USERSPACE=y # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set +# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_SA1100=y +# CONFIG_CPU_IDLE is not set # # Floating point emulation @@ -183,6 +324,8 @@ CONFIG_FPE_NWFPE=y # Userspace binary formats # CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_HAVE_AOUT=y # CONFIG_BINFMT_AOUT is not set # CONFIG_BINFMT_MISC is not set # CONFIG_ARTHUR is not set @@ -191,8 +334,120 @@ CONFIG_BINFMT_ELF=y # Power management options # CONFIG_PM=y -# CONFIG_PM_LEGACY is not set -# CONFIG_APM is not set +# CONFIG_PM_DEBUG is not set +CONFIG_PM_SLEEP=y +CONFIG_SUSPEND=y +CONFIG_SUSPEND_FREEZER=y +# CONFIG_APM_EMULATION is not set +# CONFIG_PM_RUNTIME is not set +CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_NET=y + +# +# Networking options +# +# CONFIG_PACKET is not set +CONFIG_UNIX=y +CONFIG_XFRM=y +# CONFIG_XFRM_USER is not set +# CONFIG_XFRM_SUB_POLICY is not set +# CONFIG_XFRM_MIGRATE is not set +# CONFIG_XFRM_STATISTICS is not set +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_INET_XFRM_MODE_TRANSPORT=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_INET_XFRM_MODE_BEET=y +CONFIG_INET_LRO=y +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +# CONFIG_IPV6 is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_RDS is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_IEEE802154 is not set +# CONFIG_NET_SCHED is not set +# CONFIG_DCB is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +CONFIG_IRDA=m + +# +# IrDA protocols +# +CONFIG_IRLAN=m +CONFIG_IRNET=m +CONFIG_IRCOMM=m +# CONFIG_IRDA_ULTRA is not set + +# +# IrDA options +# +# CONFIG_IRDA_CACHE_LAST_LSAP is not set +# CONFIG_IRDA_FAST_RR is not set +# CONFIG_IRDA_DEBUG is not set + +# +# Infrared-port device drivers +# + +# +# SIR device drivers +# +# CONFIG_IRTTY_SIR is not set + +# +# Dongle support +# + +# +# FIR device drivers +# +CONFIG_SA1100_FIR=m +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set # # Device Drivers @@ -201,15 +456,17 @@ CONFIG_PM=y # # Generic Driver Options # +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set - -# -# Memory Technology Devices (MTD) -# +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_SYS_HYPERVISOR is not set +# CONFIG_CONNECTOR is not set CONFIG_MTD=y # CONFIG_MTD_DEBUG is not set +# CONFIG_MTD_TESTS is not set # CONFIG_MTD_CONCAT is not set CONFIG_MTD_PARTITIONS=y CONFIG_MTD_REDBOOT_PARTS=y @@ -218,15 +475,20 @@ CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1 # CONFIG_MTD_REDBOOT_PARTS_READONLY is not set # CONFIG_MTD_CMDLINE_PARTS is not set # CONFIG_MTD_AFS_PARTS is not set +# CONFIG_MTD_AR7_PARTS is not set # # User Modules And Translation Layers # CONFIG_MTD_CHAR=y +CONFIG_MTD_BLKDEVS=y CONFIG_MTD_BLOCK=y # CONFIG_FTL is not set # CONFIG_NFTL is not set # CONFIG_INFTL is not set +# CONFIG_RFD_FTL is not set +# CONFIG_SSFDC is not set +# CONFIG_MTD_OOPS is not set # # RAM/ROM/Flash chip drivers @@ -249,6 +511,7 @@ CONFIG_MTD_MAP_BANK_WIDTH_4=y CONFIG_MTD_CFI_I2=y # CONFIG_MTD_CFI_I4 is not set # CONFIG_MTD_CFI_I8 is not set +# CONFIG_MTD_OTP is not set CONFIG_MTD_CFI_INTELEXT=y # CONFIG_MTD_CFI_AMDSTD is not set # CONFIG_MTD_CFI_STAA is not set @@ -265,7 +528,7 @@ CONFIG_MTD_CFI_UTIL=y # CONFIG_MTD_PHYSMAP is not set # CONFIG_MTD_ARM_INTEGRATOR is not set CONFIG_MTD_SA1100=y -# CONFIG_MTD_EDB7312 is not set +# CONFIG_MTD_PLATRAM is not set # # Self-contained MTD device drivers @@ -273,7 +536,6 @@ CONFIG_MTD_SA1100=y # CONFIG_MTD_SLRAM is not set # CONFIG_MTD_PHRAM is not set # CONFIG_MTD_MTDRAM is not set -# CONFIG_MTD_BLKMTD is not set # CONFIG_MTD_BLOCK2MTD is not set # @@ -282,26 +544,21 @@ CONFIG_MTD_SA1100=y # CONFIG_MTD_DOC2000 is not set # CONFIG_MTD_DOC2001 is not set # CONFIG_MTD_DOC2001PLUS is not set - -# -# NAND Flash Device Drivers -# # CONFIG_MTD_NAND is not set +# CONFIG_MTD_ONENAND is not set # -# Parallel port support +# LPDDR flash memory drivers # -# CONFIG_PARPORT is not set +# CONFIG_MTD_LPDDR is not set # -# Plug and Play support +# UBI - Unsorted block images # +# CONFIG_MTD_UBI is not set +# CONFIG_PARPORT is not set # CONFIG_PNP is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_XD is not set +CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=m # CONFIG_BLK_DEV_CRYPTOLOOP is not set @@ -309,212 +566,58 @@ CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=16 CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" +# CONFIG_BLK_DEV_XIP is not set # CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y # CONFIG_ATA_OVER_ETH is not set +# CONFIG_MG_DISK is not set +# CONFIG_MISC_DEVICES is not set +CONFIG_HAVE_IDE=y +CONFIG_IDE=y # -# ATA/ATAPI/MFM/RLL support -# -CONFIG_IDE=m -CONFIG_BLK_DEV_IDE=m - -# -# Please see Documentation/ide.txt for help/info on IDE drives +# Please see Documentation/ide/ide.txt for help/info on IDE drives # # CONFIG_BLK_DEV_IDE_SATA is not set -CONFIG_BLK_DEV_IDEDISK=m -# CONFIG_IDEDISK_MULTI_MODE is not set -# CONFIG_BLK_DEV_IDECS is not set -CONFIG_BLK_DEV_IDECD=m +CONFIG_IDE_GD=y +CONFIG_IDE_GD_ATA=y +# CONFIG_IDE_GD_ATAPI is not set +CONFIG_BLK_DEV_IDECS=y +# CONFIG_BLK_DEV_IDECD is not set # CONFIG_BLK_DEV_IDETAPE is not set -# CONFIG_BLK_DEV_IDEFLOPPY is not set # CONFIG_IDE_TASK_IOCTL is not set +CONFIG_IDE_PROC_FS=y # # IDE chipset support/bugfixes # -CONFIG_IDE_GENERIC=m -# CONFIG_IDE_ARM is not set -# CONFIG_IDE_CHIPSETS is not set +# CONFIG_BLK_DEV_PLATFORM is not set # CONFIG_BLK_DEV_IDEDMA is not set -# CONFIG_IDEDMA_AUTO is not set -# CONFIG_BLK_DEV_HD is not set # # SCSI device support # +# CONFIG_RAID_ATTRS is not set # CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set # CONFIG_MD is not set - -# -# Fusion MPT device support -# - -# -# IEEE 1394 (FireWire) support -# - -# -# I2O device support -# - -# -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -# CONFIG_PACKET is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_TUNNEL is not set -# CONFIG_IP_TCPDIAG is not set -# CONFIG_IP_TCPDIAG_IPV6 is not set -# CONFIG_IPV6 is not set -# CONFIG_NETFILTER is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set -# CONFIG_NET_CLS_ROUTE is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -# CONFIG_HAMRADIO is not set -CONFIG_IRDA=m - -# -# IrDA protocols -# -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -# CONFIG_IRDA_ULTRA is not set - -# -# IrDA options -# -# CONFIG_IRDA_CACHE_LAST_LSAP is not set -# CONFIG_IRDA_FAST_RR is not set -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# - -# -# SIR device drivers -# -# CONFIG_IRTTY_SIR is not set - -# -# Dongle support -# - -# -# Old SIR device drivers -# -# CONFIG_IRPORT_SIR is not set - -# -# Old Serial dongle support -# - -# -# FIR device drivers -# -# CONFIG_NSC_FIR is not set -# CONFIG_WINBOND_FIR is not set -# CONFIG_SMC_IRCC_FIR is not set -# CONFIG_ALI_FIR is not set -CONFIG_SA1100_FIR=m -# CONFIG_VIA_FIR is not set -# CONFIG_BT is not set CONFIG_NETDEVICES=y # CONFIG_DUMMY is not set # CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set # CONFIG_EQUALIZER is not set # CONFIG_TUN is not set - -# -# ARCnet devices -# +# CONFIG_VETH is not set # CONFIG_ARCNET is not set - -# -# Ethernet (10 or 100Mbit) -# # CONFIG_NET_ETHERNET is not set - -# -# Ethernet (1000 Mbit) -# - -# -# Ethernet (10000 Mbit) -# - -# -# Token Ring devices -# +# CONFIG_NETDEV_1000 is not set +# CONFIG_NETDEV_10000 is not set # CONFIG_TR is not set +# CONFIG_WLAN is not set # -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# PCMCIA network device support +# Enable WiMAX (Networking options) to see the WiMAX drivers # CONFIG_NET_PCMCIA=y # CONFIG_PCMCIA_3C589 is not set @@ -525,10 +628,6 @@ CONFIG_PCMCIA_PCNET=y # CONFIG_PCMCIA_SMC91C92 is not set # CONFIG_PCMCIA_XIRC2PS is not set # CONFIG_PCMCIA_AXNET is not set - -# -# Wan interfaces -# # CONFIG_WAN is not set CONFIG_PPP=m # CONFIG_PPP_MULTILINK is not set @@ -537,20 +636,23 @@ CONFIG_PPP_ASYNC=m # CONFIG_PPP_SYNC_TTY is not set CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +# CONFIG_PPP_MPPE is not set # CONFIG_PPPOE is not set +# CONFIG_PPPOL2TP is not set # CONFIG_SLIP is not set -# CONFIG_SHAPER is not set +CONFIG_SLHC=m # CONFIG_NETCONSOLE is not set - -# -# ISDN subsystem -# +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set # CONFIG_ISDN is not set +# CONFIG_PHONE is not set # # Input device support # CONFIG_INPUT=y +# CONFIG_INPUT_FF_MEMLESS is not set +# CONFIG_INPUT_POLLDEV is not set # # Userland interfaces @@ -560,7 +662,6 @@ CONFIG_INPUT_MOUSEDEV_PSAUX=y CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set # CONFIG_INPUT_EVDEV is not set # CONFIG_INPUT_EVBUG is not set @@ -568,47 +669,42 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 # Input Device Drivers # CONFIG_INPUT_KEYBOARD=y -CONFIG_KEYBOARD_ATKBD=y -# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_ATKBD is not set # CONFIG_KEYBOARD_LKKBD is not set -# CONFIG_KEYBOARD_XTKBD is not set +CONFIG_KEYBOARD_GPIO=y +# CONFIG_KEYBOARD_MATRIX is not set # CONFIG_KEYBOARD_NEWTON is not set -CONFIG_INPUT_MOUSE=y -CONFIG_MOUSE_PS2=y -# CONFIG_MOUSE_SERIAL is not set -# CONFIG_MOUSE_INPORT is not set -# CONFIG_MOUSE_LOGIBM is not set -# CONFIG_MOUSE_PC110PAD is not set -# CONFIG_MOUSE_VSXXXAA is not set +# CONFIG_KEYBOARD_OPENCORES is not set +# CONFIG_KEYBOARD_STOWAWAY is not set +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_INPUT_MOUSE is not set # CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set # # Hardware I/O ports # -CONFIG_SERIO=y -CONFIG_SERIO_SERPORT=y -CONFIG_SERIO_LIBPS2=y -# CONFIG_SERIO_RAW is not set +# CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y # # Character devices # CONFIG_VT=y +CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y CONFIG_HW_CONSOLE=y +# CONFIG_VT_HW_CONSOLE_BINDING is not set +CONFIG_DEVKMEM=y # CONFIG_SERIAL_NONSTANDARD is not set # # Serial drivers # -CONFIG_SERIAL_8250=m -# CONFIG_SERIAL_8250_CS is not set -CONFIG_SERIAL_8250_NR_UARTS=4 -# CONFIG_SERIAL_8250_EXTENDED is not set +# CONFIG_SERIAL_8250 is not set # # Non-8250 serial port support @@ -618,71 +714,125 @@ CONFIG_SERIAL_SA1100_CONSOLE=y CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set # -# IPMI +# PCMCIA character devices # -# CONFIG_IPMI_HANDLER is not set +# CONFIG_SYNCLINK_CS is not set +# CONFIG_CARDMAN_4000 is not set +# CONFIG_CARDMAN_4040 is not set +# CONFIG_IPWIRELESS is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +CONFIG_DEVPORT=y +# CONFIG_I2C is not set +# CONFIG_SPI is not set # -# Watchdog Cards +# PPS support # -# CONFIG_WATCHDOG is not set -# CONFIG_NVRAM is not set -# CONFIG_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set +# CONFIG_PPS is not set +CONFIG_ARCH_REQUIRE_GPIOLIB=y +CONFIG_GPIOLIB=y +# CONFIG_GPIO_SYSFS is not set # -# Ftape, the floppy tape device driver +# Memory mapped GPIO expanders: # -# CONFIG_DRM is not set # -# PCMCIA character devices +# I2C GPIO expanders: # -# CONFIG_SYNCLINK_CS is not set -# CONFIG_RAW_DRIVER is not set # -# TPM devices +# PCI GPIO expanders: # # -# I2C support +# SPI GPIO expanders: # -# CONFIG_I2C is not set # -# Misc devices +# AC97 GPIO expanders: # +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +# CONFIG_THERMAL is not set +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # -# Multimedia devices +# Sonics Silicon Backplane # -# CONFIG_VIDEO_DEV is not set +# CONFIG_SSB is not set # -# Digital Video Broadcasting Devices +# Multifunction device drivers # -# CONFIG_DVB is not set +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_MFD_ASIC3 is not set +CONFIG_HTC_EGPIO=y +# CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_MFD_T7L66XB is not set +# CONFIG_MFD_TC6387XB is not set +# CONFIG_MFD_TC6393XB is not set + +# +# Multimedia Capabilities Port drivers +# +# CONFIG_MCP_SA11X0 is not set +# CONFIG_REGULATOR is not set +# CONFIG_MEDIA_SUPPORT is not set # # Graphics support # +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set CONFIG_FB=y +# CONFIG_FIRMWARE_EDID is not set +# CONFIG_FB_DDC is not set +# CONFIG_FB_BOOT_VESA_SUPPORT is not set CONFIG_FB_CFB_FILLRECT=y CONFIG_FB_CFB_COPYAREA=y CONFIG_FB_CFB_IMAGEBLIT=y -CONFIG_FB_SOFT_CURSOR=y +# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set +# CONFIG_FB_SYS_FILLRECT is not set +# CONFIG_FB_SYS_COPYAREA is not set +# CONFIG_FB_SYS_IMAGEBLIT is not set +# CONFIG_FB_FOREIGN_ENDIAN is not set +# CONFIG_FB_SYS_FOPS is not set +# CONFIG_FB_SVGALIB is not set # CONFIG_FB_MACMODES is not set +# CONFIG_FB_BACKLIGHT is not set # CONFIG_FB_MODE_HELPERS is not set # CONFIG_FB_TILEBLITTING is not set + +# +# Frame buffer hardware drivers +# CONFIG_FB_SA1100=y # CONFIG_FB_S1D13XXX is not set # CONFIG_FB_VIRTUAL is not set +# CONFIG_FB_METRONOME is not set +# CONFIG_FB_MB862XX is not set +# CONFIG_FB_BROADSHEET is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set # # Console display driver support @@ -691,65 +841,54 @@ CONFIG_FB_SA1100=y # CONFIG_MDA_CONSOLE is not set CONFIG_DUMMY_CONSOLE=y # CONFIG_FRAMEBUFFER_CONSOLE is not set - -# -# Logo configuration -# # CONFIG_LOGO is not set -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set - -# -# Sound -# -CONFIG_SOUND=y - -# -# Advanced Linux Sound Architecture -# -# CONFIG_SND is not set - -# -# Open Sound System -# -# CONFIG_SOUND_PRIME is not set - -# -# USB support -# -CONFIG_USB_ARCH_HAS_HCD=y -# CONFIG_USB_ARCH_HAS_OHCI is not set -# CONFIG_USB is not set +# CONFIG_SOUND is not set +# CONFIG_HID_SUPPORT is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set +CONFIG_RTC_LIB=y +# CONFIG_RTC_CLASS is not set +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set +# CONFIG_UIO is not set # -# USB Gadget Support +# TI VLYNQ # -# CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# -# CONFIG_MMC is not set +# CONFIG_STAGING is not set # # File systems # CONFIG_EXT2_FS=y # CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set # CONFIG_EXT3_FS is not set -# CONFIG_JBD is not set +# CONFIG_EXT4_FS is not set # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set - -# -# XFS support -# +# CONFIG_FS_POSIX_ACL is not set # CONFIG_XFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_QUOTA is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set +CONFIG_FILE_LOCKING=y +CONFIG_FSNOTIFY=y CONFIG_DNOTIFY=y +# CONFIG_INOTIFY is not set +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set # CONFIG_AUTOFS_FS is not set # CONFIG_AUTOFS4_FS is not set +# CONFIG_FUSE_FS is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set # # CD-ROM/DVD Filesystems @@ -771,16 +910,13 @@ CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" # Pseudo filesystems # CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVPTS_FS_XATTR is not set # CONFIG_TMPFS is not set # CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y - -# -# Miscellaneous filesystems -# +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_HFS_FS is not set @@ -788,34 +924,37 @@ CONFIG_RAMFS=y # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set CONFIG_JFFS2_FS=y CONFIG_JFFS2_FS_DEBUG=0 -# CONFIG_JFFS2_FS_NAND is not set -# CONFIG_JFFS2_FS_NOR_ECC is not set +CONFIG_JFFS2_FS_WRITEBUFFER=y +# CONFIG_JFFS2_FS_WBUF_VERIFY is not set +# CONFIG_JFFS2_SUMMARY is not set +# CONFIG_JFFS2_FS_XATTR is not set # CONFIG_JFFS2_COMPRESSION_OPTIONS is not set CONFIG_JFFS2_ZLIB=y +# CONFIG_JFFS2_LZO is not set CONFIG_JFFS2_RTIME=y # CONFIG_JFFS2_RUBIN is not set CONFIG_CRAMFS=m +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set - -# -# Network File Systems -# +CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V3 is not set # CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set CONFIG_NFSD=m # CONFIG_NFSD_V3 is not set -CONFIG_NFSD_TCP=y +# CONFIG_NFSD_V4 is not set CONFIG_LOCKD=y CONFIG_EXPORTFS=m +CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set @@ -831,10 +970,6 @@ CONFIG_SMB_FS=m # # CONFIG_PARTITION_ADVANCED is not set CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# CONFIG_NLS=y CONFIG_NLS_DEFAULT="iso8859-1" # CONFIG_NLS_CODEPAGE_437 is not set @@ -875,20 +1010,34 @@ CONFIG_NLS_DEFAULT="iso8859-1" # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_UTF8 is not set - -# -# Profiling support -# -# CONFIG_PROFILING is not set +# CONFIG_DLM is not set # # Kernel hacking # # CONFIG_PRINTK_TIME is not set +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +# CONFIG_MAGIC_SYSRQ is not set +# CONFIG_STRIP_ASM_SYMS is not set +# CONFIG_UNUSED_SYMBOLS is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_HEADERS_CHECK is not set # CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 +# CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_MEMORY_INIT=y CONFIG_FRAME_POINTER=y +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_LATENCYTOP is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_TRACING_SUPPORT=y +# CONFIG_FTRACE is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y # CONFIG_DEBUG_USER is not set # @@ -896,21 +1045,120 @@ CONFIG_FRAME_POINTER=y # # CONFIG_KEYS is not set # CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_FIPS is not set +CONFIG_CRYPTO_ALGAPI=m +CONFIG_CRYPTO_ALGAPI2=m +CONFIG_CRYPTO_RNG=m +CONFIG_CRYPTO_RNG2=m +# CONFIG_CRYPTO_MANAGER is not set +# CONFIG_CRYPTO_MANAGER2 is not set +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +# CONFIG_CRYPTO_CBC is not set +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +# CONFIG_CRYPTO_ECB is not set +# CONFIG_CRYPTO_LRW is not set +# CONFIG_CRYPTO_PCBC is not set +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set +# CONFIG_CRYPTO_VMAC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_GHASH is not set +# CONFIG_CRYPTO_MD4 is not set +# CONFIG_CRYPTO_MD5 is not set +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +CONFIG_CRYPTO_AES=m +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +# CONFIG_CRYPTO_DES is not set +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set # -# Cryptographic options +# Compression # -# CONFIG_CRYPTO is not set +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_ZLIB is not set +# CONFIG_CRYPTO_LZO is not set # -# Hardware crypto devices +# Random Number Generation # +CONFIG_CRYPTO_ANSI_CPRNG=m +CONFIG_CRYPTO_HW=y +# CONFIG_BINARY_PRINTF is not set # # Library routines # +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y CONFIG_CRC_CCITT=m +# CONFIG_CRC16 is not set +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y +# CONFIG_CRC7 is not set # CONFIG_LIBCRC32C is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y +CONFIG_DECOMPRESS_GZIP=y +CONFIG_DECOMPRESS_BZIP2=y +CONFIG_DECOMPRESS_LZMA=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_NLATTR=y -- cgit v0.10.2 From a53c876dc111bb7507ecef3962b9cc17eb43364b Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:18:43 +0100 Subject: ARM: 5826/1: SA1100: h3100/h3600: always build htc-egpio driver Many features of h3100/h3600 (LCD, PCMCIA, Flash write, etc.) depend on correct functioning of GPIO expander handled by htc-egpio driver, so force its building in Kconfig. Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig index 854768fa..03a7f38 100644 --- a/arch/arm/mach-sa1100/Kconfig +++ b/arch/arm/mach-sa1100/Kconfig @@ -53,6 +53,7 @@ config SA1100_COLLIE config SA1100_H3100 bool "Compaq iPAQ H3100" + select HTC_EGPIO help Say Y here if you intend to run this kernel on the Compaq iPAQ H3100 handheld computer. Information about this machine and the @@ -62,6 +63,7 @@ config SA1100_H3100 config SA1100_H3600 bool "Compaq iPAQ H3600/H3700" + select HTC_EGPIO help Say Y here if you intend to run this kernel on the Compaq iPAQ H3600 handheld computer. Information about this machine and the -- cgit v0.10.2 From 729fae44dd8332cbf5b9d05a9af17a7455e165b2 Mon Sep 17 00:00:00 2001 From: Dmitry Artamonow Date: Fri, 27 Nov 2009 12:22:32 +0100 Subject: ARM: 5827/1: SA1100: h3100/h3600: emit messages on failed gpio_request Signed-off-by: Dmitry Artamonow Signed-off-by: Russell King diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c index 793e68c..0c7cea0 100644 --- a/arch/arm/mach-sa1100/h3100.c +++ b/arch/arm/mach-sa1100/h3100.c @@ -31,6 +31,8 @@ static void h3100_lcd_power(int enable) gpio_set_value(H3100_GPIO_LCD_3V_ON, enable); gpio_direction_output(H3XXX_EGPIO_LCD_ON, enable); gpio_free(H3XXX_EGPIO_LCD_ON); + } else { + pr_err("%s: can't request H3XXX_EGPIO_LCD_ON\n", __func__); } } diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c index 47587cc..af3b714 100644 --- a/arch/arm/mach-sa1100/h3600.c +++ b/arch/arm/mach-sa1100/h3600.c @@ -27,14 +27,22 @@ */ static void h3600_lcd_power(int enable) { - if (gpio_request(H3XXX_EGPIO_LCD_ON, "LCD power")) + if (gpio_request(H3XXX_EGPIO_LCD_ON, "LCD power")) { + pr_err("%s: can't request H3XXX_EGPIO_LCD_ON\n", __func__); goto err1; - if (gpio_request(H3600_EGPIO_LCD_PCI, "LCD control")) + } + if (gpio_request(H3600_EGPIO_LCD_PCI, "LCD control")) { + pr_err("%s: can't request H3XXX_EGPIO_LCD_PCI\n", __func__); goto err2; - if (gpio_request(H3600_EGPIO_LCD_5V_ON, "LCD 5v")) + } + if (gpio_request(H3600_EGPIO_LCD_5V_ON, "LCD 5v")) { + pr_err("%s: can't request H3XXX_EGPIO_LCD_5V_ON\n", __func__); goto err3; - if (gpio_request(H3600_EGPIO_LVDD_ON, "LCD 9v/-6.5v")) + } + if (gpio_request(H3600_EGPIO_LVDD_ON, "LCD 9v/-6.5v")) { + pr_err("%s: can't request H3600_EGPIO_LVDD_ON\n", __func__); goto err4; + } gpio_direction_output(H3XXX_EGPIO_LCD_ON, enable); gpio_direction_output(H3600_EGPIO_LCD_PCI, enable); diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c index a7a1982..b0784c9 100644 --- a/arch/arm/mach-sa1100/h3xxx.c +++ b/arch/arm/mach-sa1100/h3xxx.c @@ -83,8 +83,10 @@ static void h3xxx_set_vpp(int vpp) static int h3xxx_flash_init(void) { int err = gpio_request(H3XXX_EGPIO_VPP_ON, "Flash Vpp"); - if (err) + if (err) { + pr_err("%s: can't request H3XXX_EGPIO_VPP_ON\n", __func__); return err; + } err = gpio_direction_output(H3XXX_EGPIO_VPP_ON, 0); if (err) @@ -143,11 +145,15 @@ static u_int h3xxx_uart_get_mctrl(struct uart_port *port) static void h3xxx_uart_pm(struct uart_port *port, u_int state, u_int oldstate) { - if (port->mapbase == _Ser3UTCR0) + if (port->mapbase == _Ser3UTCR0) { if (!gpio_request(H3XXX_EGPIO_RS232_ON, "RS232 transceiver")) { gpio_direction_output(H3XXX_EGPIO_RS232_ON, !state); gpio_free(H3XXX_EGPIO_RS232_ON); + } else { + pr_err("%s: can't request H3XXX_EGPIO_RS232_ON\n", + __func__); } + } } /* -- cgit v0.10.2 From 1bf8e6219552d5dd27012d567ec8c4bb9c2d86b4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 6 Dec 2009 16:58:50 +0000 Subject: Fix soc_common PCMCIA configuration Jonathan Cameron reports that building PCMCIA as modules doesn't work: As module get a load of undefined symbols: ERROR: "soc_pcmcia_request_irqs" [drivers/pcmcia/pxa2xx_stargate2.ko] undefined! ERROR: "soc_pcmcia_free_irqs" [drivers/pcmcia/pxa2xx_stargate2.ko] undefined! ERROR: "soc_pcmcia_enable_irqs" [drivers/pcmcia/pxa2xx_stargate2.ko] undefined! ERROR: "soc_pcmcia_disable_irqs" [drivers/pcmcia/pxa2xx_stargate2.ko] undefined! ERROR: "soc_pcmcia_add_one" [drivers/pcmcia/pxa2xx_base.ko] undefined! ERROR: "soc_common_pcmcia_get_timing" [drivers/pcmcia/pxa2xx_base.ko] undefined! ERROR: "soc_pcmcia_remove_one" [drivers/pcmcia/pxa2xx_base.ko] undefined! make[1]: *** [__modpost] Error 1 make: *** [modules] Error 2 This is because soc_common tries to be built-in, but it should be a module. Allow soc_common to be a module. Reported-by: Jonathan Cameron Signed-off-by: Russell King diff --git a/drivers/pcmcia/Kconfig b/drivers/pcmcia/Kconfig index f3ccbcc..cd5082d 100644 --- a/drivers/pcmcia/Kconfig +++ b/drivers/pcmcia/Kconfig @@ -179,7 +179,7 @@ config PCMCIA_BCM63XX depends on BCM63XX && PCMCIA config PCMCIA_SOC_COMMON - bool + tristate config PCMCIA_SA1100 tristate "SA1100 support" -- cgit v0.10.2