From 512e67f991c8886de75a65b854d7c19a55fb5b8a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2007 22:11:11 +0200 Subject: lockdep: maintainers Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/MAINTAINERS b/MAINTAINERS index 012fa83..0f60fdb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2393,6 +2393,15 @@ M: khali@linux-fr.org L: lm-sensors@lm-sensors.org S: Maintained +LOCKDEP AND LOCKSTAT +P: Peter Zijlstra +M: peterz@infradead.org +P: Ingo Molnar +M: mingo@redhat.com +L: linux-kernel@vger.kernel.org +T: git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-lockdep.git +S: Maintained + LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks) P: Richard Russon (FlatCap) M: ldm@flatcap.org -- cgit v0.10.2 From 94c61c0aeffe64452e742087cf803d0347ef8418 Mon Sep 17 00:00:00 2001 From: Tim Pepper Date: Thu, 11 Oct 2007 22:11:11 +0200 Subject: lockdep: Avoid /proc/lockdep & lock_stat infinite output Both /proc/lockdep and /proc/lock_stat output may loop infinitely. When a read() requests an amount of data smaller than the amount of data that the seq_file's foo_show() outputs, the output starts looping and outputs the "stuck" element's data infinitely. There may be multiple sequential calls to foo_start(), foo_next()/foo_show(), and foo_stop() for a single open with sequential read of the file. The _start() does not have to start with the 0th element and _show() might be called multiple times in a row for the same element for a given open/read of the seq_file. Also header output should not be happening in _start(). All output should be in _show(), which SEQ_START_TOKEN is meant to help. Having output in _start() may also negatively impact seq_file's seq_read() and traverse() accounting. Signed-off-by: Tim Pepper Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Cc: Ingo Molnar Cc: Al Viro diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index c851b2d..8a135bd 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -25,28 +25,38 @@ static void *l_next(struct seq_file *m, void *v, loff_t *pos) { - struct lock_class *class = v; + struct lock_class *class; (*pos)++; - if (class->lock_entry.next != &all_lock_classes) - class = list_entry(class->lock_entry.next, struct lock_class, - lock_entry); - else - class = NULL; - m->private = class; + if (v == SEQ_START_TOKEN) + class = m->private; + else { + class = v; + + if (class->lock_entry.next != &all_lock_classes) + class = list_entry(class->lock_entry.next, + struct lock_class, lock_entry); + else + class = NULL; + } return class; } static void *l_start(struct seq_file *m, loff_t *pos) { - struct lock_class *class = m->private; + struct lock_class *class; + loff_t i = 0; - if (&class->lock_entry == all_lock_classes.next) - seq_printf(m, "all lock classes:\n"); + if (*pos == 0) + return SEQ_START_TOKEN; - return class; + list_for_each_entry(class, &all_lock_classes, lock_entry) { + if (++i == *pos) + return class; + } + return NULL; } static void l_stop(struct seq_file *m, void *v) @@ -101,10 +111,15 @@ static void print_name(struct seq_file *m, struct lock_class *class) static int l_show(struct seq_file *m, void *v) { unsigned long nr_forward_deps, nr_backward_deps; - struct lock_class *class = m->private; + struct lock_class *class = v; struct lock_list *entry; char c1, c2, c3, c4; + if (v == SEQ_START_TOKEN) { + seq_printf(m, "all lock classes:\n"); + return 0; + } + seq_printf(m, "%p", class->key); #ifdef CONFIG_DEBUG_LOCKDEP seq_printf(m, " OPS:%8ld", class->ops); @@ -523,10 +538,11 @@ static void *ls_start(struct seq_file *m, loff_t *pos) { struct lock_stat_seq *data = m->private; - if (data->iter == data->stats) - seq_header(m); + if (*pos == 0) + return SEQ_START_TOKEN; - if (data->iter == data->iter_end) + data->iter = data->stats + *pos; + if (data->iter >= data->iter_end) data->iter = NULL; return data->iter; @@ -538,8 +554,13 @@ static void *ls_next(struct seq_file *m, void *v, loff_t *pos) (*pos)++; - data->iter = v; - data->iter++; + if (v == SEQ_START_TOKEN) + data->iter = data->stats; + else { + data->iter = v; + data->iter++; + } + if (data->iter == data->iter_end) data->iter = NULL; @@ -552,9 +573,11 @@ static void ls_stop(struct seq_file *m, void *v) static int ls_show(struct seq_file *m, void *v) { - struct lock_stat_seq *data = m->private; + if (v == SEQ_START_TOKEN) + seq_header(m); + else + seq_stats(m, v); - seq_stats(m, data->iter); return 0; } -- cgit v0.10.2 From 3aa416b07f0adf01c090baab26fb70c35ec17623 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Thu, 11 Oct 2007 22:11:11 +0200 Subject: lockdep: fix mismatched lockdep_depth/curr_chain_hash It is possible for the current->curr_chain_key to become inconsistent with the current index if the chain fails to validate. The end result is that future lock_acquire() operations may inadvertently fail to find a hit in the cache resulting in a new node being added to the graph for every acquire. Signed-off-by: Gregory Haskins Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 734da57..42ae4a5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -1521,7 +1521,7 @@ cache_hit: } static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, - struct held_lock *hlock, int chain_head) + struct held_lock *hlock, int chain_head, u64 chain_key) { /* * Trylock needs to maintain the stack of held locks, but it @@ -1534,7 +1534,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, * graph_lock for us) */ if (!hlock->trylock && (hlock->check == 2) && - lookup_chain_cache(curr->curr_chain_key, hlock->class)) { + lookup_chain_cache(chain_key, hlock->class)) { /* * Check whether last held lock: * @@ -1576,7 +1576,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, #else static inline int validate_chain(struct task_struct *curr, struct lockdep_map *lock, struct held_lock *hlock, - int chain_head) + int chain_head, u64 chain_key) { return 1; } @@ -2450,11 +2450,11 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, chain_head = 1; } chain_key = iterate_chain_key(chain_key, id); - curr->curr_chain_key = chain_key; - if (!validate_chain(curr, lock, hlock, chain_head)) + if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) return 0; + curr->curr_chain_key = chain_key; curr->lockdep_depth++; check_chain_key(curr); #ifdef CONFIG_DEBUG_LOCKDEP -- cgit v0.10.2 From e4564f79d4b6923da7360df4b24a48cc2d4160de Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2007 22:11:12 +0200 Subject: lockdep: fixup mutex annotations The fancy mutex_lock fastpath has too many indirections to track the caller hence all contentions are perceived to come from mutex_lock(). Avoid this by explicitly not using the fastpath code (it was disabled already anyway). Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 0d50ea3..6a735c7 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -120,14 +120,17 @@ static inline int fastcall mutex_is_locked(struct mutex *lock) * See kernel/mutex.c for detailed documentation of these APIs. * Also see Documentation/mutex-design.txt. */ -extern void fastcall mutex_lock(struct mutex *lock); -extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock); - #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); + +#define mutex_lock(lock) mutex_lock_nested(lock, 0) +#define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0) #else +extern void fastcall mutex_lock(struct mutex *lock); +extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock); + # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) #endif diff --git a/kernel/mutex.c b/kernel/mutex.c index 691b865..d7fe50c 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -51,6 +51,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) EXPORT_SYMBOL(__mutex_init); +#ifndef CONFIG_DEBUG_LOCK_ALLOC /* * We split the mutex lock/unlock logic into separate fastpath and * slowpath functions, to reduce the register pressure on the fastpath. @@ -92,6 +93,7 @@ void inline fastcall __sched mutex_lock(struct mutex *lock) } EXPORT_SYMBOL(mutex_lock); +#endif static void fastcall noinline __sched __mutex_unlock_slowpath(atomic_t *lock_count); @@ -122,7 +124,8 @@ EXPORT_SYMBOL(mutex_unlock); * Lock a mutex (possibly interruptible), slowpath: */ static inline int __sched -__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) +__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, + unsigned long ip) { struct task_struct *task = current; struct mutex_waiter waiter; @@ -132,7 +135,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) spin_lock_mutex(&lock->wait_lock, flags); debug_mutex_lock_common(lock, &waiter); - mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + mutex_acquire(&lock->dep_map, subclass, 0, ip); debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); /* add waiting tasks to the end of the waitqueue (FIFO): */ @@ -143,7 +146,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) if (old_val == 1) goto done; - lock_contended(&lock->dep_map, _RET_IP_); + lock_contended(&lock->dep_map, ip); for (;;) { /* @@ -166,7 +169,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) if (unlikely(state == TASK_INTERRUPTIBLE && signal_pending(task))) { mutex_remove_waiter(lock, &waiter, task_thread_info(task)); - mutex_release(&lock->dep_map, 1, _RET_IP_); + mutex_release(&lock->dep_map, 1, ip); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); @@ -197,20 +200,12 @@ done: return 0; } -static void fastcall noinline __sched -__mutex_lock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); -} - #ifdef CONFIG_DEBUG_LOCK_ALLOC void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_nested); @@ -219,7 +214,7 @@ int __sched mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); @@ -271,6 +266,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count) __mutex_unlock_common_slowpath(lock_count, 1); } +#ifndef CONFIG_DEBUG_LOCK_ALLOC /* * Here come the less common (and hence less performance-critical) APIs: * mutex_lock_interruptible() and mutex_trylock(). @@ -298,13 +294,22 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock) EXPORT_SYMBOL(mutex_lock_interruptible); +static void fastcall noinline __sched +__mutex_lock_slowpath(atomic_t *lock_count) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); +} + static int fastcall noinline __sched __mutex_lock_interruptible_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_); } +#endif /* * Spinlock based trylock, we take the spinlock and check whether we -- cgit v0.10.2 From b351d164e860d1ffffdc501c32f55dd1446c385b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2007 22:11:12 +0200 Subject: lockdep: syscall exit check Provide a check to validate that we do not hold any locks when switching back to user-space. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 0e843bf..9dc46db 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -238,6 +238,7 @@ extern void lockdep_info(void); extern void lockdep_reset(void); extern void lockdep_reset_lock(struct lockdep_map *lock); extern void lockdep_free_key_range(void *start, unsigned long size); +extern void lockdep_sys_exit(void); extern void lockdep_off(void); extern void lockdep_on(void); @@ -317,6 +318,7 @@ static inline void lockdep_on(void) # define INIT_LOCKDEP # define lockdep_reset() do { debug_locks = 1; } while (0) # define lockdep_free_key_range(start, size) do { } while (0) +# define lockdep_sys_exit() do { } while (0) /* * The class key takes no space if lockdep is disabled: */ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 42ae4a5..a6f1ee9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3199,3 +3199,19 @@ void debug_show_held_locks(struct task_struct *task) } EXPORT_SYMBOL_GPL(debug_show_held_locks); + +void lockdep_sys_exit(void) +{ + struct task_struct *curr = current; + + if (unlikely(curr->lockdep_depth)) { + if (!debug_locks_off()) + return; + printk("\n================================================\n"); + printk( "[ BUG: lock held when returning to user space! ]\n"); + printk( "------------------------------------------------\n"); + printk("%s/%d is leaving the kernel with locks still held!\n", + curr->comm, curr->pid); + lockdep_print_held_locks(curr); + } +} -- cgit v0.10.2 From c7e872e7da5514d014707a407ea562d197cc0136 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2007 22:11:12 +0200 Subject: lockdep: i386: connect the sysexit hook Run the lockdep_sys_exit hook after all other C code on the syscall return path. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 290b7bc..8099fea0 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -251,6 +251,7 @@ check_userspace: jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -338,6 +339,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx @@ -377,6 +379,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) # store the return value syscall_exit: + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -467,6 +470,7 @@ work_pending: jz work_notifysig work_resched: call schedule + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret diff --git a/include/asm-x86/irqflags_32.h b/include/asm-x86/irqflags_32.h index eff8585..d058b04 100644 --- a/include/asm-x86/irqflags_32.h +++ b/include/asm-x86/irqflags_32.h @@ -160,4 +160,17 @@ static inline int raw_irqs_disabled(void) # define TRACE_IRQS_OFF #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call lockdep_sys_exit; \ + popl %edx; \ + popl %ecx; \ + popl %eax; +#else +# define LOCKDEP_SYS_EXIT +#endif + #endif -- cgit v0.10.2 From 10cd706d180b62a61aace5b440247c8785026ac1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2007 22:11:12 +0200 Subject: lockdep: x86_64: connect the sysexit hook Run the lockdep_sys_exit hook after all other C code on the syscall return path. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1d232e5..f1cacd4 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -244,6 +244,7 @@ ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ sysret_check: + LOCKDEP_SYS_EXIT GET_THREAD_INFO(%rcx) cli TRACE_IRQS_OFF @@ -333,6 +334,7 @@ int_ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ int_with_check: + LOCKDEP_SYS_EXIT_IRQ GET_THREAD_INFO(%rcx) movl threadinfo_flags(%rcx),%edx andl %edi,%edx @@ -544,11 +546,13 @@ exit_intr: retint_with_reschedule: movl $_TIF_WORK_MASK,%edi retint_check: + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz retint_careful -retint_swapgs: + +retint_swapgs: /* return to user-space */ /* * The iretq could re-enable interrupts: */ @@ -557,7 +561,7 @@ retint_swapgs: swapgs jmp restore_args -retint_restore_args: +retint_restore_args: /* return to kernel space */ cli /* * The iretq could re-enable interrupts: @@ -866,26 +870,21 @@ error_sti: movq ORIG_RAX(%rsp),%rsi /* get error code */ movq $-1,ORIG_RAX(%rsp) call *%rax - /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ -error_exit: - movl %ebx,%eax + /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ +error_exit: + movl %ebx,%eax RESTORE_REST cli TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax jne retint_kernel + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful - /* - * The iret might restore flags: - */ - TRACE_IRQS_IRETQ - swapgs - RESTORE_ARGS 0,8,0 - jmp iret_label + jmp retint_swapgs CFI_ENDPROC error_kernelspace: diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index 55e586d..6ea73f3 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -50,6 +50,10 @@ thunk trace_hardirqs_on_thunk,trace_hardirqs_on thunk trace_hardirqs_off_thunk,trace_hardirqs_off #endif + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + thunk lockdep_sys_exit_thunk,lockdep_sys_exit +#endif /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC diff --git a/include/asm-x86/irqflags_64.h b/include/asm-x86/irqflags_64.h index 86e70fe..5341ea1 100644 --- a/include/asm-x86/irqflags_64.h +++ b/include/asm-x86/irqflags_64.h @@ -137,6 +137,20 @@ static inline void halt(void) # define TRACE_IRQS_ON # define TRACE_IRQS_OFF # endif +# ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk +# define LOCKDEP_SYS_EXIT_IRQ \ + TRACE_IRQS_ON; \ + sti; \ + SAVE_REST; \ + LOCKDEP_SYS_EXIT; \ + RESTORE_REST; \ + cli; \ + TRACE_IRQS_OFF; +# else +# define LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ +# endif #endif #endif -- cgit v0.10.2 From 523b44cff279c42c79f7bda709e2fefc30f20a59 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 11 Oct 2007 22:11:12 +0200 Subject: lockdep: s390: connect the sysexit hook Run the lockdep_sys_exit hook before returning to user space. Reviewed-by: Martin Schwidefsky Signed-off-by: Heiko Carstens Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f3bceb1..139ca15 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -68,9 +68,15 @@ STACK_SIZE = 1 << STACK_SHIFT l %r1,BASED(.Ltrace_irq_off) basr %r14,%r1 .endm + + .macro LOCKDEP_SYS_EXIT + l %r1,BASED(.Llockdep_sys_exit) + basr %r14,%r1 + .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF +#define LOCKDEP_SYS_EXIT #endif /* @@ -260,6 +266,7 @@ sysc_return: bno BASED(sysc_leave) tm __TI_flags+3(%r9),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) + LOCKDEP_SYS_EXIT sysc_leave: RESTORE_ALL __LC_RETURN_PSW,1 @@ -283,6 +290,7 @@ sysc_work: bo BASED(sysc_restart) tm __TI_flags+3(%r9),_TIF_SINGLE_STEP bo BASED(sysc_singlestep) + LOCKDEP_SYS_EXIT b BASED(sysc_leave) # @@ -572,6 +580,7 @@ io_return: #endif tm __TI_flags+3(%r9),_TIF_WORK_INT bnz BASED(io_work) # there is work to do (signals etc.) + LOCKDEP_SYS_EXIT io_leave: RESTORE_ALL __LC_RETURN_PSW,0 io_done: @@ -618,6 +627,7 @@ io_work_loop: bo BASED(io_reschedule) tm __TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) bnz BASED(io_sigpending) + LOCKDEP_SYS_EXIT b BASED(io_leave) # @@ -1040,6 +1050,8 @@ cleanup_io_leave_insn: .Ltrace_irq_on: .long trace_hardirqs_on .Ltrace_irq_off: .long trace_hardirqs_off +.Llockdep_sys_exit: + .long lockdep_sys_exit #endif .Lcritical_start: .long __critical_start + 0x80000000 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 9c0d5cc..05e26d1 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -66,9 +66,14 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ .macro TRACE_IRQS_OFF brasl %r14,trace_hardirqs_off .endm + + .macro LOCKDEP_SYS_EXIT + brasl %r14,lockdep_sys_exit + .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF +#define LOCKDEP_SYS_EXIT #endif .macro STORE_TIMER lc_offset @@ -255,6 +260,7 @@ sysc_return: jno sysc_leave tm __TI_flags+7(%r9),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) + LOCKDEP_SYS_EXIT sysc_leave: RESTORE_ALL __LC_RETURN_PSW,1 @@ -278,6 +284,7 @@ sysc_work: jo sysc_restart tm __TI_flags+7(%r9),_TIF_SINGLE_STEP jo sysc_singlestep + LOCKDEP_SYS_EXIT j sysc_leave # @@ -558,6 +565,7 @@ io_return: #endif tm __TI_flags+7(%r9),_TIF_WORK_INT jnz io_work # there is work to do (signals etc.) + LOCKDEP_SYS_EXIT io_leave: RESTORE_ALL __LC_RETURN_PSW,0 io_done: @@ -605,6 +613,7 @@ io_work_loop: jo io_reschedule tm __TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) jnz io_sigpending + LOCKDEP_SYS_EXIT j io_leave # -- cgit v0.10.2 From 34a3d1e83708702ac6cb872215e68cd07dae298b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2007 22:11:12 +0200 Subject: lockdep: annotate journal_start() On Fri, 2007-07-13 at 02:05 -0700, Andrew Morton wrote: > Except lockdep doesn't know about journal_start(), which has ranking > requirements similar to a semaphore. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 772b653..8df5bac 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -233,6 +233,8 @@ out: return ret; } +static struct lock_class_key jbd_handle_key; + /* Allocate a new handle. This should probably be in a slab... */ static handle_t *new_handle(int nblocks) { @@ -243,6 +245,8 @@ static handle_t *new_handle(int nblocks) handle->h_buffer_credits = nblocks; handle->h_ref = 1; + lockdep_init_map(&handle->h_lockdep_map, "jbd_handle", &jbd_handle_key, 0); + return handle; } @@ -286,6 +290,9 @@ handle_t *journal_start(journal_t *journal, int nblocks) current->journal_info = NULL; handle = ERR_PTR(err); } + + lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); + return handle; } @@ -1411,6 +1418,8 @@ int journal_stop(handle_t *handle) spin_unlock(&journal->j_state_lock); } + lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); + jbd_free_handle(handle); return err; } diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 4527375..700a93b 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #endif @@ -396,6 +397,10 @@ struct handle_s unsigned int h_sync: 1; /* sync-on-close */ unsigned int h_jdata: 1; /* force data journaling */ unsigned int h_aborted: 1; /* fatal error on handle */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map h_lockdep_map; +#endif }; -- cgit v0.10.2 From 851a67b825540a8e00c0be3ee25e4627ba8b133b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2007 22:11:12 +0200 Subject: lockdep: annotate rcu_read_{,un}lock{,_bh} lockdep annotate rcu_read_{,un}lock{,_bh} in order to catch imbalanced usage. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Cc: Paul E. McKenney diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 9dc46db..f6279f6 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -253,6 +253,13 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass); /* + * To initialize a lockdep_map statically use this macro. + * Note that _name must not be NULL. + */ +#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ + { .name = (_name), .key = (void *)(_key), } + +/* * Reinitialize a lock key - for cases where there is special locking or * special initialization of locks so that the validator gets the scope * of dependencies wrong: they are either too broad (they need a class-split) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index fe17d7d..76c1a53 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -41,6 +41,7 @@ #include #include #include +#include /** * struct rcu_head - callback structure for use with RCU @@ -133,6 +134,15 @@ static inline void rcu_bh_qsctr_inc(int cpu) extern int rcu_pending(int cpu); extern int rcu_needs_cpu(int cpu); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern struct lockdep_map rcu_lock_map; +# define rcu_read_acquire() lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_) +# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) +#else +# define rcu_read_acquire() do { } while (0) +# define rcu_read_release() do { } while (0) +#endif + /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. * @@ -166,6 +176,7 @@ extern int rcu_needs_cpu(int cpu); do { \ preempt_disable(); \ __acquire(RCU); \ + rcu_read_acquire(); \ } while(0) /** @@ -175,6 +186,7 @@ extern int rcu_needs_cpu(int cpu); */ #define rcu_read_unlock() \ do { \ + rcu_read_release(); \ __release(RCU); \ preempt_enable(); \ } while(0) @@ -204,6 +216,7 @@ extern int rcu_needs_cpu(int cpu); do { \ local_bh_disable(); \ __acquire(RCU_BH); \ + rcu_read_acquire(); \ } while(0) /* @@ -213,6 +226,7 @@ extern int rcu_needs_cpu(int cpu); */ #define rcu_read_unlock_bh() \ do { \ + rcu_read_release(); \ __release(RCU_BH); \ local_bh_enable(); \ } while(0) diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 2c2dd84..130214f 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -49,6 +49,14 @@ #include #include +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key rcu_lock_key; +struct lockdep_map rcu_lock_map = + STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); + +EXPORT_SYMBOL_GPL(rcu_lock_map); +#endif + /* Definition for rcupdate control block. */ static struct rcu_ctrlblk rcu_ctrlblk = { .cur = -300, -- cgit v0.10.2 From 58dfe883d3bc3b4c08c53a7f39e2ca3ec84f089e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Oct 2007 22:25:25 +0200 Subject: lockdep: annotate kprobes irq fiddling kprobes disables irqs for jprobes, but does not tell lockdep about it. This resolves this warning during an allyesconfig bzImage bootup test: [ 423.670337] WARNING: at kernel/lockdep.c:2658 check_flags() [ 423.670341] [] show_trace_log_lvl+0x19/0x2e [ 423.670348] [] show_trace+0x12/0x14 [ 423.670350] [] dump_stack+0x14/0x16 [ 423.670353] [] check_flags+0x95/0x142 [ 423.670357] [] lock_acquire+0x52/0xb8 [ 423.670360] [] _spin_lock+0x2e/0x58 [ 423.670365] [] jtcp_rcv_established+0x6e/0x189 [ 423.670369] [] tcp_v4_do_rcv+0x30b/0x620 [ 423.670373] [] tcp_v4_rcv+0x89d/0x8fa [ 423.670376] [] ip_local_deliver+0x17d/0x225 [ 423.670380] [] ip_rcv+0x493/0x4ce [ 423.670383] [] netif_receive_skb+0x347/0x365 [ 423.670388] [] nv_napi_poll+0x501/0x6c3 [ 423.670393] [] net_rx_action+0xa3/0x1b6 [ 423.670396] [] __do_softirq+0x76/0xfb [ 423.670400] [] do_softirq+0x75/0xf3 [ akpm: checkpatch.pl cleanups ] Cc: Prasanna S Panchamukhi Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S. Miller Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c index c2d03e9..e7d0d3c 100644 --- a/arch/x86/kernel/kprobes_32.c +++ b/arch/x86/kernel/kprobes_32.c @@ -557,6 +557,12 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->eflags |= kcb->kprobe_saved_eflags; +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + if (raw_irqs_disabled_flags(regs->eflags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +#endif /*Restore back the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -694,6 +700,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); regs->eflags &= ~IF_MASK; + trace_hardirqs_off(); regs->eip = (unsigned long)(jp->entry); return 1; } diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c index 1df17a0..62e28e5 100644 --- a/arch/x86/kernel/kprobes_64.c +++ b/arch/x86/kernel/kprobes_64.c @@ -544,6 +544,12 @@ int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->eflags |= kcb->kprobe_saved_rflags; +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + if (raw_irqs_disabled_flags(regs->eflags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +#endif /* Restore the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -684,6 +690,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); regs->eflags &= ~IF_MASK; + trace_hardirqs_off(); regs->rip = (unsigned long)(jp->entry); return 1; } -- cgit v0.10.2 From d475fd428ce77aa2a8bc650d230e17663a4f49c3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Oct 2007 14:51:31 +0200 Subject: lockdep: per filesystem inode lock class Give each filesystem its own inode lock class. The various filesystems have different locking order wrt the inode locks; esp. the pseudo filesystems differ from the rest. Signed-off-by: Peter Zijlstra diff --git a/fs/inode.c b/fs/inode.c index 29f5068..bf6adf1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -142,6 +142,15 @@ static struct inode *alloc_inode(struct super_block *sb) return NULL; } + spin_lock_init(&inode->i_lock); + lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); + + mutex_init(&inode->i_mutex); + lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); + + init_rwsem(&inode->i_alloc_sem); + lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); + mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; @@ -190,8 +199,6 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); - mutex_init(&inode->i_mutex); - init_rwsem(&inode->i_alloc_sem); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); rwlock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); @@ -199,7 +206,6 @@ void inode_init_once(struct inode *inode) spin_lock_init(&inode->i_data.private_lock); INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); - spin_lock_init(&inode->i_lock); i_size_ordered_init(inode); #ifdef CONFIG_INOTIFY INIT_LIST_HEAD(&inode->inotify_watches); diff --git a/include/linux/fs.h b/include/linux/fs.h index 16421f6..0cad20e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1302,8 +1302,13 @@ struct file_system_type { struct module *owner; struct file_system_type * next; struct list_head fs_supers; + struct lock_class_key s_lock_key; struct lock_class_key s_umount_key; + + struct lock_class_key i_lock_key; + struct lock_class_key i_mutex_key; + struct lock_class_key i_alloc_sem_key; }; extern int get_sb_bdev(struct file_system_type *fs_type, -- cgit v0.10.2 From 14358e6ddaed27499d7d366b3e65c3e46b39e1c4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 14 Oct 2007 01:38:33 +0200 Subject: lockdep: annotate dir vs file i_mutex On Mon, 2007-09-24 at 22:13 -0400, Steven Rostedt wrote: > The circular lock seems to be this: > > #1: > > sys_mmap2: down_write(&mm->mmap_sem); > nfs_revalidate_mapping: mutex_lock(&inode->i_mutex); > > > #0: > > vfs_readdir: mutex_lock(&inode->i_mutex); > - during the readdir (filldir64), we take a user fault (missing page?) > and call do_page_fault - > do_page_fault: down_read(&mm->mmap_sem); > > > So it does indeed look like a circular locking. Now the question is, "is > this a bug?". Looking like the inode of #1 must be a file or something > else that you can mmap and the inode of #0 seems it must be a directory. > I would say "no". > > Now if you can readdir on a file or mmap a directory, then this could be > an issue. > > Otherwise, I'd love to see someone teach lockdep about this issue! ;-) Make a distinction between file and dir usage of i_mutex. The inode should be complete and unused at unlock_new_inode(), re-init i_mutex depending on its type. Signed-off-by: Peter Zijlstra diff --git a/fs/inode.c b/fs/inode.c index bf6adf1..f97de0a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -567,6 +567,18 @@ EXPORT_SYMBOL(new_inode); void unlock_new_inode(struct inode *inode) { +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct file_system_type *type = inode->i_sb->s_type; + /* + * ensure nobody is actually holding i_mutex + */ + mutex_destroy(&inode->i_mutex); + mutex_init(&inode->i_mutex); + if (inode->i_mode & S_IFDIR) + lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); + else + lockdep_set_class(&inode->i_mutex, &type->i_mutex_key); +#endif /* * This is special! We do not need the spinlock * when clearing I_LOCK, because we're guaranteed diff --git a/include/linux/fs.h b/include/linux/fs.h index 0cad20e..6d760f1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1308,6 +1308,7 @@ struct file_system_type { struct lock_class_key i_lock_key; struct lock_class_key i_mutex_key; + struct lock_class_key i_mutex_dir_key; struct lock_class_key i_alloc_sem_key; }; -- cgit v0.10.2