From fad99fac2627e2cc0ebfe07fcb5046c0b4e103f9 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Wed, 20 Oct 2010 08:20:00 -0500 Subject: x86,kgdb: fix debugger hw breakpoint test regression in 2.6.35 HW breakpoints events stopped working correctly with kgdb as a result of commit: 018cbffe6819f6f8db20a0a3acd9bab9bfd667e4 (Merge commit 'v2.6.33' into perf/core), later commit: ba773f7c510c0b252145933926c636c439889207 (x86,kgdb: Fix hw breakpoint regression) allowed breakpoints to propagate to the debugger core but did not completely address the original regression in functionality found in 2.6.35. When the DR_STEP flag is set in dr6 along with any of the DR_TRAP bits, the kgdb exception handler will enter once from the hw_breakpoint API call back and again from the die notifier for do_debug(), which causes the debugger to stop twice and also for the kgdb regression tests to fail running under kvm with: echo V2I1 > /sys/module/kgdbts/parameters/kgdbts To address the problem, the kgdb overflow handler needs to implement the same logic as the ptrace overflow handler call back with respect to updating the virtual copy of dr6. This will allow the kgdb do_debug() die notifier to properly handle the exception and the attached debugger, or kgdb test suite, will only receive a single notification. Signed-off-by: Jason Wessel CC: Frederic Weisbecker CC: x86@kernel.org diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 852b819..497f973 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -621,7 +621,12 @@ int kgdb_arch_init(void) static void kgdb_hw_overflow_handler(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { - kgdb_ll_trap(DIE_DEBUG, "debug", regs, 0, 0, SIGTRAP); + struct task_struct *tsk = current; + int i; + + for (i = 0; i < 4; i++) + if (breakinfo[i].enabled) + tsk->thread.debugreg6 |= (DR_TRAP0 << i); } void kgdb_arch_late(void) -- cgit v0.10.2 From 16cdc628c3aed47d02205135b7e2f01e0064f566 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 6 Aug 2010 11:47:14 -0500 Subject: debug_core: move all watch dog syncs to a single function Move the various clock and watch dog syncs to a single function in advance of adding another sync for the rcu stall detector. Signed-off-by: Jason Wessel diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index de407c7..c812857 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -470,6 +470,12 @@ static void dbg_cpu_switch(int cpu, int next_cpu) kgdb_info[next_cpu].exception_state |= DCPU_NEXT_MASTER; } +static void dbg_touch_watchdogs(void) +{ + touch_softlockup_watchdog_sync(); + clocksource_touch_watchdog(); +} + static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs) { unsigned long flags; @@ -523,8 +529,7 @@ return_normal: if (trace_on) tracing_on(); atomic_dec(&cpu_in_kgdb[cpu]); - touch_softlockup_watchdog_sync(); - clocksource_touch_watchdog(); + dbg_touch_watchdogs(); local_irq_restore(flags); return 0; } @@ -541,8 +546,7 @@ return_normal: (kgdb_info[cpu].task && kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { atomic_set(&kgdb_active, -1); - touch_softlockup_watchdog_sync(); - clocksource_touch_watchdog(); + dbg_touch_watchdogs(); local_irq_restore(flags); goto acquirelock; @@ -659,8 +663,7 @@ kgdb_restore: tracing_on(); /* Free kgdb_active */ atomic_set(&kgdb_active, -1); - touch_softlockup_watchdog_sync(); - clocksource_touch_watchdog(); + dbg_touch_watchdogs(); local_irq_restore(flags); return kgdb_info[cpu].ret_state; -- cgit v0.10.2 From fb70b5888b70b0b50f738fbfc019445493112eb1 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 13 Aug 2010 12:44:04 -0500 Subject: debug_core: stop rcu warnings on kernel resume When returning from the kernel debugger reset the rcu jiffies_stall value to prevent the rcu stall detector from sending NMI events which invoke a stack dump for each cpu in the system. Signed-off-by: Jason Wessel diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index c812857..5a3b04d 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -474,6 +475,7 @@ static void dbg_touch_watchdogs(void) { touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); + rcu_cpu_stall_reset(); } static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs) -- cgit v0.10.2 From f7030bbc446430ecd12c9ad02cf0ea94934e5f91 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 11 Oct 2010 10:20:14 -0500 Subject: kdb: Allow kernel loadable modules to add kdb shell functions In order to allow kernel modules to dynamically add a command to the kdb shell the kdb_register, kdb_register_repeat, kdb_unregister, and kdb_printf need to be exported as GPL symbols. Any kernel module that adds a dynamic kdb shell function should only need to include linux/kdb.h. Signed-off-by: Jason Wessel diff --git a/include/linux/kdb.h b/include/linux/kdb.h index ea6e524..deda197 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -28,6 +28,41 @@ extern int kdb_poll_idx; extern int kdb_initial_cpu; extern atomic_t kdb_event; +/* Types and messages used for dynamically added kdb shell commands */ + +#define KDB_MAXARGS 16 /* Maximum number of arguments to a function */ + +typedef enum { + KDB_REPEAT_NONE = 0, /* Do not repeat this command */ + KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */ + KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */ +} kdb_repeat_t; + +typedef int (*kdb_func_t)(int, const char **); + +/* KDB return codes from a command or internal kdb function */ +#define KDB_NOTFOUND (-1) +#define KDB_ARGCOUNT (-2) +#define KDB_BADWIDTH (-3) +#define KDB_BADRADIX (-4) +#define KDB_NOTENV (-5) +#define KDB_NOENVVALUE (-6) +#define KDB_NOTIMP (-7) +#define KDB_ENVFULL (-8) +#define KDB_ENVBUFFULL (-9) +#define KDB_TOOMANYBPT (-10) +#define KDB_TOOMANYDBREGS (-11) +#define KDB_DUPBPT (-12) +#define KDB_BPTNOTFOUND (-13) +#define KDB_BADMODE (-14) +#define KDB_BADINT (-15) +#define KDB_INVADDRFMT (-16) +#define KDB_BADREG (-17) +#define KDB_BADCPUNUM (-18) +#define KDB_BADLENGTH (-19) +#define KDB_NOBP (-20) +#define KDB_BADADDR (-21) + /* * kdb_diemsg * @@ -105,9 +140,17 @@ int kdb_process_cpu(const struct task_struct *p) /* kdb access to register set for stack dumping */ extern struct pt_regs *kdb_current_regs; +/* Dynamic kdb shell command registration */ +extern int kdb_register(char *, kdb_func_t, char *, char *, short); +extern int kdb_register_repeat(char *, kdb_func_t, char *, char *, + short, kdb_repeat_t); +extern int kdb_unregister(char *); #else /* ! CONFIG_KGDB_KDB */ #define kdb_printf(...) #define kdb_init(x) +#define kdb_register(...) +#define kdb_register_repeat(...) +#define kdb_uregister(x) #endif /* CONFIG_KGDB_KDB */ enum { KDB_NOT_INITIALIZED, diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index c9b7f4f9..96fdaac 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c @@ -823,4 +823,4 @@ int kdb_printf(const char *fmt, ...) return r; } - +EXPORT_SYMBOL_GPL(kdb_printf); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index caf057a..5448990 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2783,6 +2783,8 @@ int kdb_register_repeat(char *cmd, return 0; } +EXPORT_SYMBOL_GPL(kdb_register_repeat); + /* * kdb_register - Compatibility register function for commands that do @@ -2805,6 +2807,7 @@ int kdb_register(char *cmd, return kdb_register_repeat(cmd, func, usage, help, minlen, KDB_REPEAT_NONE); } +EXPORT_SYMBOL_GPL(kdb_register); /* * kdb_unregister - This function is used to unregister a kernel @@ -2833,6 +2836,7 @@ int kdb_unregister(char *cmd) /* Couldn't find it. */ return 1; } +EXPORT_SYMBOL_GPL(kdb_unregister); /* Initialize the kdb command table. */ static void __init kdb_inittab(void) diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index be775f7..1921e6e 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -15,29 +15,6 @@ #include #include "../debug_core.h" -/* Kernel Debugger Error codes. Must not overlap with command codes. */ -#define KDB_NOTFOUND (-1) -#define KDB_ARGCOUNT (-2) -#define KDB_BADWIDTH (-3) -#define KDB_BADRADIX (-4) -#define KDB_NOTENV (-5) -#define KDB_NOENVVALUE (-6) -#define KDB_NOTIMP (-7) -#define KDB_ENVFULL (-8) -#define KDB_ENVBUFFULL (-9) -#define KDB_TOOMANYBPT (-10) -#define KDB_TOOMANYDBREGS (-11) -#define KDB_DUPBPT (-12) -#define KDB_BPTNOTFOUND (-13) -#define KDB_BADMODE (-14) -#define KDB_BADINT (-15) -#define KDB_INVADDRFMT (-16) -#define KDB_BADREG (-17) -#define KDB_BADCPUNUM (-18) -#define KDB_BADLENGTH (-19) -#define KDB_NOBP (-20) -#define KDB_BADADDR (-21) - /* Kernel Debugger Command codes. Must not overlap with error codes. */ #define KDB_CMD_GO (-1001) #define KDB_CMD_CPU (-1002) @@ -93,17 +70,6 @@ */ #define KDB_MAXBPT 16 -/* Maximum number of arguments to a function */ -#define KDB_MAXARGS 16 - -typedef enum { - KDB_REPEAT_NONE = 0, /* Do not repeat this command */ - KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */ - KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */ -} kdb_repeat_t; - -typedef int (*kdb_func_t)(int, const char **); - /* Symbol table format returned by kallsyms. */ typedef struct __ksymtab { unsigned long value; /* Address of symbol */ @@ -123,11 +89,6 @@ extern int kallsyms_symbol_next(char *prefix_name, int flag); extern int kallsyms_symbol_complete(char *prefix_name, int max_len); /* Exported Symbols for kernel loadable modules to use. */ -extern int kdb_register(char *, kdb_func_t, char *, char *, short); -extern int kdb_register_repeat(char *, kdb_func_t, char *, char *, - short, kdb_repeat_t); -extern int kdb_unregister(char *); - extern int kdb_getarea_size(void *, unsigned long, size_t); extern int kdb_putarea_size(unsigned long, void *, size_t); -- cgit v0.10.2 From e3bda3ac33d3bf3e5a4049e2cabe82d3caaffc26 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 11 Oct 2010 10:20:14 -0500 Subject: kdb,ftdump: Remove reference to internal kdb include Now that include/linux/kdb.h properly exports all the functions required to dynamically add a kdb shell command, the reference to the private kdb header can be removed. Signed-off-by: Jason Wessel diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 7b8ecd7..3c5c5df 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c @@ -13,7 +13,6 @@ #include #include -#include "../debug/kdb/kdb_private.h" #include "trace.h" #include "trace_output.h" -- cgit v0.10.2 From 75d14edee5689716b55afe467acfc13206a31f95 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 11 Oct 2010 10:20:14 -0500 Subject: kdb: Fix oops in kdb_unregister Nothing should try to use kdb_commands directly as sometimes it is null. Instead, use the for_each_kdbcmd() iterator. This particular problem dates back to the initial kdb merge (2.6.35), but at that point nothing was dynamically unregistering commands from the kdb shell. Signed-off-by: Jason Wessel diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 5448990..4226f32 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2826,7 +2826,7 @@ int kdb_unregister(char *cmd) /* * find the command. */ - for (i = 0, kp = kdb_commands; i < kdb_max_commands; i++, kp++) { + for_each_kdbcmd(kp, i) { if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) { kp->cmd_name = NULL; return 0; -- cgit v0.10.2 From 91b152aa85bbcf076e269565394c31964f940371 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Mon, 23 Aug 2010 09:20:14 -0500 Subject: kdb,kgdb: fix sparse fixups Fix the following sparse warnings: kdb_main.c:328:5: warning: symbol 'kdbgetu64arg' was not declared. Should it be static? kgdboc.c:246:12: warning: symbol 'kgdboc_early_init' was not declared. Should it be static? kgdb.c:652:26: warning: incorrect type in argument 1 (different address spaces) kgdb.c:652:26: expected void const *ptr kgdb.c:652:26: got struct perf_event *[noderef] *pev The one in kgdb.c required the (void * __force) because of the return code from register_wide_hw_breakpoint looking like: return (void __percpu __force *)ERR_PTR(err); Signed-off-by: Jason Wessel diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 497f973..101bf22 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -649,7 +649,7 @@ void kgdb_arch_late(void) if (breakinfo[i].pev) continue; breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); - if (IS_ERR(breakinfo[i].pev)) { + if (IS_ERR((void * __force)breakinfo[i].pev)) { printk(KERN_ERR "kgdb: Could not allocate hw" "breakpoints\nDisabling the kernel debugger\n"); breakinfo[i].pev = NULL; diff --git a/drivers/serial/kgdboc.c b/drivers/serial/kgdboc.c index 39f9a1a..d4b711c 100644 --- a/drivers/serial/kgdboc.c +++ b/drivers/serial/kgdboc.c @@ -243,7 +243,7 @@ static struct kgdb_io kgdboc_io_ops = { #ifdef CONFIG_KGDB_SERIAL_CONSOLE /* This is only available if kgdboc is a built in for early debugging */ -int __init kgdboc_early_init(char *opt) +static int __init kgdboc_early_init(char *opt) { /* save the first character of the config string because the * init routine can destroy it. diff --git a/include/linux/kdb.h b/include/linux/kdb.h index deda197..aadff7c 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h @@ -139,6 +139,14 @@ int kdb_process_cpu(const struct task_struct *p) /* kdb access to register set for stack dumping */ extern struct pt_regs *kdb_current_regs; +#ifdef CONFIG_KALLSYMS +extern const char *kdb_walk_kallsyms(loff_t *pos); +#else /* ! CONFIG_KALLSYMS */ +static inline const char *kdb_walk_kallsyms(loff_t *pos) +{ + return NULL; +} +#endif /* ! CONFIG_KALLSYMS */ /* Dynamic kdb shell command registration */ extern int kdb_register(char *, kdb_func_t, char *, char *, short); diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index 1921e6e..35d69ed 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -105,6 +105,7 @@ extern int kdb_getword(unsigned long *, unsigned long, size_t); extern int kdb_putword(unsigned long, unsigned long, size_t); extern int kdbgetularg(const char *, unsigned long *); +extern int kdbgetu64arg(const char *, u64 *); extern char *kdbgetenv(const char *); extern int kdbgetaddrarg(int, const char **, int*, unsigned long *, long *, char **); @@ -216,14 +217,6 @@ extern void kdb_ps1(const struct task_struct *p); extern void kdb_print_nameval(const char *name, unsigned long val); extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); extern void kdb_meminfo_proc_show(void); -#ifdef CONFIG_KALLSYMS -extern const char *kdb_walk_kallsyms(loff_t *pos); -#else /* ! CONFIG_KALLSYMS */ -static inline const char *kdb_walk_kallsyms(loff_t *pos) -{ - return NULL; -} -#endif /* ! CONFIG_KALLSYMS */ extern char *kdb_getstr(char *, size_t, char *); /* Defines for kdb_symbol_print */ -- cgit v0.10.2 From c1bb9a9c1911036549c5cdfb23f32d7d20ffdc5a Mon Sep 17 00:00:00 2001 From: Dongdong Deng Date: Mon, 13 Sep 2010 06:58:00 -0500 Subject: debug_core: disable hw_breakpoints on all cores in kgdb_cpu_enter() The slave cpus do not have the hw breakpoints disabled upon entry to the debug_core and as a result could cause unrecoverable recursive faults on badly placed breakpoints, or get out of sync with the arch specific hw breakpoint operations. This patch addresses the problem by invoking kgdb_disable_hw_debug() earlier in kgdb_enter_cpu for each cpu that enters the debug core. The hw breakpoint dis/enable flow should be: master_debug_cpu slave_debug_cpu \ / kgdb_cpu_enter | kgdb_disable_hw_debug --> uninstall pre-enabled hw_breakpoint | do add/rm dis/enable operates to hw_breakpoints on master_debug_cpu.. | correct_hw_break --> correct/install the enabled hw_breakpoint | leave_kgdb Signed-off-by: Dongdong Deng Signed-off-by: Jason Wessel diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 5a3b04d..bb94977 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -485,6 +485,9 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs) int error; int i, cpu; int trace_on = 0; + + kgdb_disable_hw_debug(ks->linux_regs); + acquirelock: /* * Interrupts will be restored by the 'trap return' code, except when @@ -569,8 +572,6 @@ return_normal: if (dbg_io_ops->pre_exception) dbg_io_ops->pre_exception(); - kgdb_disable_hw_debug(ks->linux_regs); - /* * Get the passive CPU lock which will hold all the non-primary * CPU in a spin state while the debugger is active @@ -661,6 +662,8 @@ kgdb_restore: else kgdb_sstep_pid = 0; } + if (arch_kgdb_ops.correct_hw_break) + arch_kgdb_ops.correct_hw_break(); if (trace_on) tracing_on(); /* Free kgdb_active */ -- cgit v0.10.2 From 39a0715f5ace92268190c89e246fd1cf741dbaea Mon Sep 17 00:00:00 2001 From: Dongdong Deng Date: Mon, 13 Sep 2010 06:58:00 -0500 Subject: x86,kgdb: remove unnecessary call to kgdb_correct_hw_break() The kernel debug_core invokes hw breakpoint install and removal via call backs. The architecture specific kgdb stubs only need to implement the call backs and not actually call the functions. Signed-off-by: Dongdong Deng Signed-off-by: Jason Wessel CC: x86@kernel.org CC: Thomas Gleixner CC: Ingo Molnar CC: H. Peter Anvin diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 101bf22..d81cfeb 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -477,8 +477,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, raw_smp_processor_id()); } - kgdb_correct_hw_break(); - return 0; } -- cgit v0.10.2 From dfee3a7b92208b30f77876068aece9ea571270c2 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 21 May 2010 08:46:00 -0500 Subject: debug_core: refactor locking for master/slave cpus For quite some time there have been problems with memory barriers and various races with NMI on multi processor systems using the kernel debugger. The algorithm for entering the kernel debug core and resuming kernel execution was racy and had several known edge case problems with attempting to debug something on a heavily loaded system using breakpoints that are hit repeatedly and quickly. The prior "locking" design entry worked as follows: * The atomic counter kgdb_active was used with atomic exchange in order to elect a master cpu out of all the cpus that may have taken a debug exception. * The master cpu increments all elements of passive_cpu_wait[]. * The master cpu issues the round up cpus message. * Each "slave cpu" that enters the debug core increments its own element in cpu_in_kgdb[]. * Each "slave cpu" spins on passive_cpu_wait[] until it becomes 0. * The master cpu debugs the system. The new scheme removes the two arrays of atomic counters and replaces them with 2 single counters. One counter is used to count the number of cpus waiting to become a master cpu (because one or more hit an exception). The second counter is use to indicate how many cpus have entered as slave cpus. The new entry logic works as follows: * One or more cpus enters via kgdb_handle_exception() and increments the masters_in_kgdb. Each cpu attempts to get the spin lock called dbg_master_lock. * The master cpu sets kgdb_active to the current cpu. * The master cpu takes the spinlock dbg_slave_lock. * The master cpu asks to round up all the other cpus. * Each slave cpu that is not already in kgdb_handle_exception() will enter and increment slaves_in_kgdb. Each slave will now spin try_locking on dbg_slave_lock. * The master cpu waits for the sum of masters_in_kgdb and slaves_in_kgdb to be equal to the sum of the online cpus. * The master cpu debugs the system. In the new design the kgdb_active can only be changed while holding dbg_master_lock. Stress testing has not turned up any further entry/exit races that existed in the prior locking design. The prior locking design suffered from atomic variables not being truly atomic (in the capacity as used by kgdb) along with memory barrier races. Signed-off-by: Jason Wessel Acked-by: Dongdong Deng diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index bb94977..26dbdc3 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -110,13 +110,15 @@ static struct kgdb_bkpt kgdb_break[KGDB_MAX_BREAKPOINTS] = { */ atomic_t kgdb_active = ATOMIC_INIT(-1); EXPORT_SYMBOL_GPL(kgdb_active); +static DEFINE_RAW_SPINLOCK(dbg_master_lock); +static DEFINE_RAW_SPINLOCK(dbg_slave_lock); /* * We use NR_CPUs not PERCPU, in case kgdb is used to debug early * bootup code (which might not have percpu set up yet): */ -static atomic_t passive_cpu_wait[NR_CPUS]; -static atomic_t cpu_in_kgdb[NR_CPUS]; +static atomic_t masters_in_kgdb; +static atomic_t slaves_in_kgdb; static atomic_t kgdb_break_tasklet_var; atomic_t kgdb_setting_breakpoint; @@ -478,14 +480,23 @@ static void dbg_touch_watchdogs(void) rcu_cpu_stall_reset(); } -static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs) +static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, + int exception_state) { unsigned long flags; int sstep_tries = 100; int error; - int i, cpu; + int cpu; int trace_on = 0; + int online_cpus = num_online_cpus(); + kgdb_info[ks->cpu].enter_kgdb++; + kgdb_info[ks->cpu].exception_state |= exception_state; + + if (exception_state == DCPU_WANT_MASTER) + atomic_inc(&masters_in_kgdb); + else + atomic_inc(&slaves_in_kgdb); kgdb_disable_hw_debug(ks->linux_regs); acquirelock: @@ -500,14 +511,15 @@ acquirelock: kgdb_info[cpu].task = current; kgdb_info[cpu].ret_state = 0; kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT; - /* - * Make sure the above info reaches the primary CPU before - * our cpu_in_kgdb[] flag setting does: - */ - atomic_inc(&cpu_in_kgdb[cpu]); - if (exception_level == 1) + /* Make sure the above info reaches the primary CPU */ + smp_mb(); + + if (exception_level == 1) { + if (raw_spin_trylock(&dbg_master_lock)) + atomic_xchg(&kgdb_active, cpu); goto cpu_master_loop; + } /* * CPU will loop if it is a slave or request to become a kgdb @@ -519,10 +531,12 @@ cpu_loop: kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER; goto cpu_master_loop; } else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) { - if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu) + if (raw_spin_trylock(&dbg_master_lock)) { + atomic_xchg(&kgdb_active, cpu); break; + } } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) { - if (!atomic_read(&passive_cpu_wait[cpu])) + if (!raw_spin_is_locked(&dbg_slave_lock)) goto return_normal; } else { return_normal: @@ -533,7 +547,11 @@ return_normal: arch_kgdb_ops.correct_hw_break(); if (trace_on) tracing_on(); - atomic_dec(&cpu_in_kgdb[cpu]); + kgdb_info[cpu].exception_state &= + ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); + kgdb_info[cpu].enter_kgdb--; + smp_mb__before_atomic_dec(); + atomic_dec(&slaves_in_kgdb); dbg_touch_watchdogs(); local_irq_restore(flags); return 0; @@ -551,6 +569,7 @@ return_normal: (kgdb_info[cpu].task && kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { atomic_set(&kgdb_active, -1); + raw_spin_unlock(&dbg_master_lock); dbg_touch_watchdogs(); local_irq_restore(flags); @@ -576,10 +595,8 @@ return_normal: * Get the passive CPU lock which will hold all the non-primary * CPU in a spin state while the debugger is active */ - if (!kgdb_single_step) { - for (i = 0; i < NR_CPUS; i++) - atomic_inc(&passive_cpu_wait[i]); - } + if (!kgdb_single_step) + raw_spin_lock(&dbg_slave_lock); #ifdef CONFIG_SMP /* Signal the other CPUs to enter kgdb_wait() */ @@ -590,10 +607,9 @@ return_normal: /* * Wait for the other CPUs to be notified and be waiting for us: */ - for_each_online_cpu(i) { - while (kgdb_do_roundup && !atomic_read(&cpu_in_kgdb[i])) - cpu_relax(); - } + while (kgdb_do_roundup && (atomic_read(&masters_in_kgdb) + + atomic_read(&slaves_in_kgdb)) != online_cpus) + cpu_relax(); /* * At this point the primary processor is completely @@ -634,24 +650,11 @@ cpu_master_loop: if (dbg_io_ops->post_exception) dbg_io_ops->post_exception(); - atomic_dec(&cpu_in_kgdb[ks->cpu]); - if (!kgdb_single_step) { - for (i = NR_CPUS-1; i >= 0; i--) - atomic_dec(&passive_cpu_wait[i]); - /* - * Wait till all the CPUs have quit from the debugger, - * but allow a CPU that hit an exception and is - * waiting to become the master to remain in the debug - * core. - */ - for_each_online_cpu(i) { - while (kgdb_do_roundup && - atomic_read(&cpu_in_kgdb[i]) && - !(kgdb_info[i].exception_state & - DCPU_WANT_MASTER)) - cpu_relax(); - } + raw_spin_unlock(&dbg_slave_lock); + /* Wait till all the CPUs have quit from the debugger. */ + while (kgdb_do_roundup && atomic_read(&slaves_in_kgdb)) + cpu_relax(); } kgdb_restore: @@ -666,8 +669,15 @@ kgdb_restore: arch_kgdb_ops.correct_hw_break(); if (trace_on) tracing_on(); + + kgdb_info[cpu].exception_state &= + ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); + kgdb_info[cpu].enter_kgdb--; + smp_mb__before_atomic_dec(); + atomic_dec(&masters_in_kgdb); /* Free kgdb_active */ atomic_set(&kgdb_active, -1); + raw_spin_unlock(&dbg_master_lock); dbg_touch_watchdogs(); local_irq_restore(flags); @@ -686,7 +696,6 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) { struct kgdb_state kgdb_var; struct kgdb_state *ks = &kgdb_var; - int ret; ks->cpu = raw_smp_processor_id(); ks->ex_vector = evector; @@ -697,11 +706,10 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) if (kgdb_reenter_check(ks)) return 0; /* Ouch, double exception ! */ - kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER; - ret = kgdb_cpu_enter(ks, regs); - kgdb_info[ks->cpu].exception_state &= ~(DCPU_WANT_MASTER | - DCPU_IS_SLAVE); - return ret; + if (kgdb_info[ks->cpu].enter_kgdb != 0) + return 0; + + return kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER); } int kgdb_nmicallback(int cpu, void *regs) @@ -714,12 +722,9 @@ int kgdb_nmicallback(int cpu, void *regs) ks->cpu = cpu; ks->linux_regs = regs; - if (!atomic_read(&cpu_in_kgdb[cpu]) && - atomic_read(&kgdb_active) != -1 && - atomic_read(&kgdb_active) != cpu) { - kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE; - kgdb_cpu_enter(ks, regs); - kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE; + if (kgdb_info[ks->cpu].enter_kgdb == 0 && + raw_spin_is_locked(&dbg_master_lock)) { + kgdb_cpu_enter(ks, regs, DCPU_IS_SLAVE); return 0; } #endif diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h index c5d753d..3494c28 100644 --- a/kernel/debug/debug_core.h +++ b/kernel/debug/debug_core.h @@ -40,6 +40,7 @@ struct debuggerinfo_struct { int exception_state; int ret_state; int irq_depth; + int enter_kgdb; }; extern struct debuggerinfo_struct kgdb_info[]; -- cgit v0.10.2 From 495363d380b4f4745bd8677912688654afc44020 Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 21 May 2010 08:46:00 -0500 Subject: kdb,debug_core: adjust master cpu switch logic against new debug_core locking The kdb shell needs to enforce switching back to the original CPU that took the exception before restoring normal kernel execution. Resuming from a different CPU than what took the original exception will cause problems with spin locks that are freed from the a different processor than had taken the lock. The special logic in dbg_cpu_switch() can go away entirely with because the state of what cpus want to be masters or slaves will remain unchanged between entry and exit of the debug_core exception context. Signed-off-by: Jason Wessel diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 26dbdc3..fec596d 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -460,19 +460,6 @@ static int kgdb_reenter_check(struct kgdb_state *ks) return 1; } -static void dbg_cpu_switch(int cpu, int next_cpu) -{ - /* Mark the cpu we are switching away from as a slave when it - * holds the kgdb_active token. This must be done so that the - * that all the cpus wait in for the debug core will not enter - * again as the master. */ - if (cpu == atomic_read(&kgdb_active)) { - kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE; - kgdb_info[cpu].exception_state &= ~DCPU_WANT_MASTER; - } - kgdb_info[next_cpu].exception_state |= DCPU_NEXT_MASTER; -} - static void dbg_touch_watchdogs(void) { touch_softlockup_watchdog_sync(); @@ -638,7 +625,8 @@ cpu_master_loop: if (error == DBG_PASS_EVENT) { dbg_kdb_mode = !dbg_kdb_mode; } else if (error == DBG_SWITCH_CPU_EVENT) { - dbg_cpu_switch(cpu, dbg_switch_cpu); + kgdb_info[dbg_switch_cpu].exception_state |= + DCPU_NEXT_MASTER; goto cpu_loop; } else { kgdb_info[cpu].ret_state = error; diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index bf6e827..dd0b1b7 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c @@ -86,7 +86,7 @@ int kdb_stub(struct kgdb_state *ks) } /* Set initial kdb state variables */ KDB_STATE_CLEAR(KGDB_TRANS); - kdb_initial_cpu = ks->cpu; + kdb_initial_cpu = atomic_read(&kgdb_active); kdb_current_task = kgdb_info[ks->cpu].task; kdb_current_regs = kgdb_info[ks->cpu].debuggerinfo; /* Remove any breakpoints as needed by kdb and clear single step */ @@ -105,7 +105,6 @@ int kdb_stub(struct kgdb_state *ks) ks->pass_exception = 1; KDB_FLAG_SET(CATASTROPHIC); } - kdb_initial_cpu = ks->cpu; if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) { KDB_STATE_CLEAR(SSBPT); KDB_STATE_CLEAR(DOING_SS); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 4226f32..d7bda21 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -1749,13 +1749,13 @@ static int kdb_go(int argc, const char **argv) int nextarg; long offset; + if (raw_smp_processor_id() != kdb_initial_cpu) { + kdb_printf("go must execute on the entry cpu, " + "please use \"cpu %d\" and then execute go\n", + kdb_initial_cpu); + return KDB_BADCPUNUM; + } if (argc == 1) { - if (raw_smp_processor_id() != kdb_initial_cpu) { - kdb_printf("go
must be issued from the " - "initial cpu, do cpu %d first\n", - kdb_initial_cpu); - return KDB_ARGCOUNT; - } nextarg = 1; diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL); -- cgit v0.10.2