From 512e67f991c8886de75a65b854d7c19a55fb5b8a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Oct 2007 22:11:11 +0200
Subject: lockdep: maintainers

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/MAINTAINERS b/MAINTAINERS
index 012fa83..0f60fdb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2393,6 +2393,15 @@ M:	khali@linux-fr.org
 L:	lm-sensors@lm-sensors.org
 S:	Maintained
 
+LOCKDEP AND LOCKSTAT
+P:	Peter Zijlstra
+M:	peterz@infradead.org
+P:	Ingo Molnar
+M:	mingo@redhat.com
+L:	linux-kernel@vger.kernel.org
+T:	git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-lockdep.git
+S:	Maintained
+
 LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks)
 P:	Richard Russon (FlatCap)
 M:	ldm@flatcap.org
-- 
cgit v0.10.2


From 94c61c0aeffe64452e742087cf803d0347ef8418 Mon Sep 17 00:00:00 2001
From: Tim Pepper <lnxninja@linux.vnet.ibm.com>
Date: Thu, 11 Oct 2007 22:11:11 +0200
Subject: lockdep: Avoid /proc/lockdep & lock_stat infinite output Both
 /proc/lockdep and /proc/lock_stat output may loop infinitely.

When a read() requests an amount of data smaller than the amount of data
that the seq_file's foo_show() outputs, the output starts looping and
outputs the "stuck" element's data infinitely.  There may be multiple
sequential calls to foo_start(), foo_next()/foo_show(), and foo_stop()
for a single open with sequential read of the file.  The _start() does not
have to start with the 0th element and _show() might be called multiple
times in a row for the same element for a given open/read of the seq_file.

Also header output should not be happening in _start().  All output should
be in _show(), which SEQ_START_TOKEN is meant to help.  Having output in
_start() may also negatively impact seq_file's seq_read() and traverse()
accounting.

Signed-off-by: Tim Pepper <lnxninja@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Al Viro <viro@ftp.linux.org.uk>

diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index c851b2d..8a135bd 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -25,28 +25,38 @@
 
 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct lock_class *class = v;
+	struct lock_class *class;
 
 	(*pos)++;
 
-	if (class->lock_entry.next != &all_lock_classes)
-		class = list_entry(class->lock_entry.next, struct lock_class,
-				  lock_entry);
-	else
-		class = NULL;
-	m->private = class;
+	if (v == SEQ_START_TOKEN)
+		class = m->private;
+	else {
+		class = v;
+
+		if (class->lock_entry.next != &all_lock_classes)
+			class = list_entry(class->lock_entry.next,
+					   struct lock_class, lock_entry);
+		else
+			class = NULL;
+	}
 
 	return class;
 }
 
 static void *l_start(struct seq_file *m, loff_t *pos)
 {
-	struct lock_class *class = m->private;
+	struct lock_class *class;
+	loff_t i = 0;
 
-	if (&class->lock_entry == all_lock_classes.next)
-		seq_printf(m, "all lock classes:\n");
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
 
-	return class;
+	list_for_each_entry(class, &all_lock_classes, lock_entry) {
+		if (++i == *pos)
+		return class;
+	}
+	return NULL;
 }
 
 static void l_stop(struct seq_file *m, void *v)
@@ -101,10 +111,15 @@ static void print_name(struct seq_file *m, struct lock_class *class)
 static int l_show(struct seq_file *m, void *v)
 {
 	unsigned long nr_forward_deps, nr_backward_deps;
-	struct lock_class *class = m->private;
+	struct lock_class *class = v;
 	struct lock_list *entry;
 	char c1, c2, c3, c4;
 
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(m, "all lock classes:\n");
+		return 0;
+	}
+
 	seq_printf(m, "%p", class->key);
 #ifdef CONFIG_DEBUG_LOCKDEP
 	seq_printf(m, " OPS:%8ld", class->ops);
@@ -523,10 +538,11 @@ static void *ls_start(struct seq_file *m, loff_t *pos)
 {
 	struct lock_stat_seq *data = m->private;
 
-	if (data->iter == data->stats)
-		seq_header(m);
+	if (*pos == 0)
+		return SEQ_START_TOKEN;
 
-	if (data->iter == data->iter_end)
+	data->iter = data->stats + *pos;
+	if (data->iter >= data->iter_end)
 		data->iter = NULL;
 
 	return data->iter;
@@ -538,8 +554,13 @@ static void *ls_next(struct seq_file *m, void *v, loff_t *pos)
 
 	(*pos)++;
 
-	data->iter = v;
-	data->iter++;
+	if (v == SEQ_START_TOKEN)
+		data->iter = data->stats;
+	else {
+		data->iter = v;
+		data->iter++;
+	}
+
 	if (data->iter == data->iter_end)
 		data->iter = NULL;
 
@@ -552,9 +573,11 @@ static void ls_stop(struct seq_file *m, void *v)
 
 static int ls_show(struct seq_file *m, void *v)
 {
-	struct lock_stat_seq *data = m->private;
+	if (v == SEQ_START_TOKEN)
+		seq_header(m);
+	else
+		seq_stats(m, v);
 
-	seq_stats(m, data->iter);
 	return 0;
 }
 
-- 
cgit v0.10.2


From 3aa416b07f0adf01c090baab26fb70c35ec17623 Mon Sep 17 00:00:00 2001
From: Gregory Haskins <ghaskins@novell.com>
Date: Thu, 11 Oct 2007 22:11:11 +0200
Subject: lockdep: fix mismatched lockdep_depth/curr_chain_hash It is possible
 for the current->curr_chain_key to become inconsistent with the current index
 if the chain fails to validate.  The end result is that future lock_acquire()
 operations may inadvertently fail to find a hit in the cache resulting in a
 new node being added to the graph for every acquire.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 734da57..42ae4a5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1521,7 +1521,7 @@ cache_hit:
 }
 
 static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
-	       	struct held_lock *hlock, int chain_head)
+	       	struct held_lock *hlock, int chain_head, u64 chain_key)
 {
 	/*
 	 * Trylock needs to maintain the stack of held locks, but it
@@ -1534,7 +1534,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
 	 * graph_lock for us)
 	 */
 	if (!hlock->trylock && (hlock->check == 2) &&
-			lookup_chain_cache(curr->curr_chain_key, hlock->class)) {
+			lookup_chain_cache(chain_key, hlock->class)) {
 		/*
 		 * Check whether last held lock:
 		 *
@@ -1576,7 +1576,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
 #else
 static inline int validate_chain(struct task_struct *curr,
 	       	struct lockdep_map *lock, struct held_lock *hlock,
-		int chain_head)
+		int chain_head, u64 chain_key)
 {
 	return 1;
 }
@@ -2450,11 +2450,11 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 		chain_head = 1;
 	}
 	chain_key = iterate_chain_key(chain_key, id);
-	curr->curr_chain_key = chain_key;
 
-	if (!validate_chain(curr, lock, hlock, chain_head))
+	if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
 		return 0;
 
+	curr->curr_chain_key = chain_key;
 	curr->lockdep_depth++;
 	check_chain_key(curr);
 #ifdef CONFIG_DEBUG_LOCKDEP
-- 
cgit v0.10.2


From e4564f79d4b6923da7360df4b24a48cc2d4160de Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Oct 2007 22:11:12 +0200
Subject: lockdep: fixup mutex annotations

The fancy mutex_lock fastpath has too many indirections to track the caller
hence all contentions are perceived to come from mutex_lock().

Avoid this by explicitly not using the fastpath code (it was disabled already
anyway).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 0d50ea3..6a735c7 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -120,14 +120,17 @@ static inline int fastcall mutex_is_locked(struct mutex *lock)
  * See kernel/mutex.c for detailed documentation of these APIs.
  * Also see Documentation/mutex-design.txt.
  */
-extern void fastcall mutex_lock(struct mutex *lock);
-extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock);
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
 extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
 					unsigned int subclass);
+
+#define mutex_lock(lock) mutex_lock_nested(lock, 0)
+#define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0)
 #else
+extern void fastcall mutex_lock(struct mutex *lock);
+extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock);
+
 # define mutex_lock_nested(lock, subclass) mutex_lock(lock)
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 #endif
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 691b865..d7fe50c 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -51,6 +51,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 
 EXPORT_SYMBOL(__mutex_init);
 
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * We split the mutex lock/unlock logic into separate fastpath and
  * slowpath functions, to reduce the register pressure on the fastpath.
@@ -92,6 +93,7 @@ void inline fastcall __sched mutex_lock(struct mutex *lock)
 }
 
 EXPORT_SYMBOL(mutex_lock);
+#endif
 
 static void fastcall noinline __sched
 __mutex_unlock_slowpath(atomic_t *lock_count);
@@ -122,7 +124,8 @@ EXPORT_SYMBOL(mutex_unlock);
  * Lock a mutex (possibly interruptible), slowpath:
  */
 static inline int __sched
-__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
+__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
+	       	unsigned long ip)
 {
 	struct task_struct *task = current;
 	struct mutex_waiter waiter;
@@ -132,7 +135,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
 	spin_lock_mutex(&lock->wait_lock, flags);
 
 	debug_mutex_lock_common(lock, &waiter);
-	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	mutex_acquire(&lock->dep_map, subclass, 0, ip);
 	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
 
 	/* add waiting tasks to the end of the waitqueue (FIFO): */
@@ -143,7 +146,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
 	if (old_val == 1)
 		goto done;
 
-	lock_contended(&lock->dep_map, _RET_IP_);
+	lock_contended(&lock->dep_map, ip);
 
 	for (;;) {
 		/*
@@ -166,7 +169,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
 		if (unlikely(state == TASK_INTERRUPTIBLE &&
 						signal_pending(task))) {
 			mutex_remove_waiter(lock, &waiter, task_thread_info(task));
-			mutex_release(&lock->dep_map, 1, _RET_IP_);
+			mutex_release(&lock->dep_map, 1, ip);
 			spin_unlock_mutex(&lock->wait_lock, flags);
 
 			debug_mutex_free_waiter(&waiter);
@@ -197,20 +200,12 @@ done:
 	return 0;
 }
 
-static void fastcall noinline __sched
-__mutex_lock_slowpath(atomic_t *lock_count)
-{
-	struct mutex *lock = container_of(lock_count, struct mutex, count);
-
-	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
-}
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 void __sched
 mutex_lock_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
-	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_);
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_nested);
@@ -219,7 +214,7 @@ int __sched
 mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
 {
 	might_sleep();
-	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass);
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_);
 }
 
 EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
@@ -271,6 +266,7 @@ __mutex_unlock_slowpath(atomic_t *lock_count)
 	__mutex_unlock_common_slowpath(lock_count, 1);
 }
 
+#ifndef CONFIG_DEBUG_LOCK_ALLOC
 /*
  * Here come the less common (and hence less performance-critical) APIs:
  * mutex_lock_interruptible() and mutex_trylock().
@@ -298,13 +294,22 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
 
 EXPORT_SYMBOL(mutex_lock_interruptible);
 
+static void fastcall noinline __sched
+__mutex_lock_slowpath(atomic_t *lock_count)
+{
+	struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_);
+}
+
 static int fastcall noinline __sched
 __mutex_lock_interruptible_slowpath(atomic_t *lock_count)
 {
 	struct mutex *lock = container_of(lock_count, struct mutex, count);
 
-	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_);
 }
+#endif
 
 /*
  * Spinlock based trylock, we take the spinlock and check whether we
-- 
cgit v0.10.2


From b351d164e860d1ffffdc501c32f55dd1446c385b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Oct 2007 22:11:12 +0200
Subject: lockdep: syscall exit check

Provide a check to validate that we do not hold any locks when switching
back to user-space.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 0e843bf..9dc46db 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -238,6 +238,7 @@ extern void lockdep_info(void);
 extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
 extern void lockdep_free_key_range(void *start, unsigned long size);
+extern void lockdep_sys_exit(void);
 
 extern void lockdep_off(void);
 extern void lockdep_on(void);
@@ -317,6 +318,7 @@ static inline void lockdep_on(void)
 # define INIT_LOCKDEP
 # define lockdep_reset()		do { debug_locks = 1; } while (0)
 # define lockdep_free_key_range(start, size)	do { } while (0)
+# define lockdep_sys_exit() 			do { } while (0)
 /*
  * The class key takes no space if lockdep is disabled:
  */
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 42ae4a5..a6f1ee9 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3199,3 +3199,19 @@ void debug_show_held_locks(struct task_struct *task)
 }
 
 EXPORT_SYMBOL_GPL(debug_show_held_locks);
+
+void lockdep_sys_exit(void)
+{
+	struct task_struct *curr = current;
+
+	if (unlikely(curr->lockdep_depth)) {
+		if (!debug_locks_off())
+			return;
+		printk("\n================================================\n");
+		printk(  "[ BUG: lock held when returning to user space! ]\n");
+		printk(  "------------------------------------------------\n");
+		printk("%s/%d is leaving the kernel with locks still held!\n",
+				curr->comm, curr->pid);
+		lockdep_print_held_locks(curr);
+	}
+}
-- 
cgit v0.10.2


From c7e872e7da5514d014707a407ea562d197cc0136 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Oct 2007 22:11:12 +0200
Subject: lockdep: i386: connect the sysexit hook

Run the lockdep_sys_exit hook after all other C code on the syscall
return path.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 290b7bc..8099fea0 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -251,6 +251,7 @@ check_userspace:
 	jb resume_kernel		# not returning to v8086 or userspace
 
 ENTRY(resume_userspace)
+	LOCKDEP_SYS_EXIT
  	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
@@ -338,6 +339,7 @@ sysenter_past_esp:
 	jae syscall_badsys
 	call *sys_call_table(,%eax,4)
 	movl %eax,PT_EAX(%esp)
+	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
@@ -377,6 +379,7 @@ syscall_call:
 	call *sys_call_table(,%eax,4)
 	movl %eax,PT_EAX(%esp)		# store the return value
 syscall_exit:
+	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
@@ -467,6 +470,7 @@ work_pending:
 	jz work_notifysig
 work_resched:
 	call schedule
+	LOCKDEP_SYS_EXIT
 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
diff --git a/include/asm-x86/irqflags_32.h b/include/asm-x86/irqflags_32.h
index eff8585..d058b04 100644
--- a/include/asm-x86/irqflags_32.h
+++ b/include/asm-x86/irqflags_32.h
@@ -160,4 +160,17 @@ static inline int raw_irqs_disabled(void)
 # define TRACE_IRQS_OFF
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define LOCKDEP_SYS_EXIT			\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call lockdep_sys_exit;			\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+#else
+# define LOCKDEP_SYS_EXIT
+#endif
+
 #endif
-- 
cgit v0.10.2


From 10cd706d180b62a61aace5b440247c8785026ac1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Oct 2007 22:11:12 +0200
Subject: lockdep: x86_64: connect the sysexit hook

Run the lockdep_sys_exit hook after all other C code on the syscall
return path.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1d232e5..f1cacd4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -244,6 +244,7 @@ ret_from_sys_call:
 	movl $_TIF_ALLWORK_MASK,%edi
 	/* edi:	flagmask */
 sysret_check:		
+	LOCKDEP_SYS_EXIT
 	GET_THREAD_INFO(%rcx)
 	cli
 	TRACE_IRQS_OFF
@@ -333,6 +334,7 @@ int_ret_from_sys_call:
 	movl $_TIF_ALLWORK_MASK,%edi
 	/* edi:	mask to check */
 int_with_check:
+	LOCKDEP_SYS_EXIT_IRQ
 	GET_THREAD_INFO(%rcx)
 	movl threadinfo_flags(%rcx),%edx
 	andl %edi,%edx
@@ -544,11 +546,13 @@ exit_intr:
 retint_with_reschedule:
 	movl $_TIF_WORK_MASK,%edi
 retint_check:
+	LOCKDEP_SYS_EXIT_IRQ
 	movl threadinfo_flags(%rcx),%edx
 	andl %edi,%edx
 	CFI_REMEMBER_STATE
 	jnz  retint_careful
-retint_swapgs:	 	
+
+retint_swapgs:		/* return to user-space */
 	/*
 	 * The iretq could re-enable interrupts:
 	 */
@@ -557,7 +561,7 @@ retint_swapgs:
 	swapgs 
 	jmp restore_args
 
-retint_restore_args:				
+retint_restore_args:	/* return to kernel space */
 	cli
 	/*
 	 * The iretq could re-enable interrupts:
@@ -866,26 +870,21 @@ error_sti:
 	movq ORIG_RAX(%rsp),%rsi	/* get error code */ 
 	movq $-1,ORIG_RAX(%rsp)
 	call *%rax
-	/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */	 
-error_exit:		
-	movl %ebx,%eax		
+	/* ebx:	no swapgs flag (1: don't need swapgs, 0: need it) */
+error_exit:
+	movl %ebx,%eax
 	RESTORE_REST
 	cli
 	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)	
 	testl %eax,%eax
 	jne  retint_kernel
+	LOCKDEP_SYS_EXIT_IRQ
 	movl  threadinfo_flags(%rcx),%edx
 	movl  $_TIF_WORK_MASK,%edi
 	andl  %edi,%edx
 	jnz  retint_careful
-	/*
-	 * The iret might restore flags:
-	 */
-	TRACE_IRQS_IRETQ
-	swapgs 
-	RESTORE_ARGS 0,8,0						
-	jmp iret_label
+	jmp retint_swapgs
 	CFI_ENDPROC
 
 error_kernelspace:
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index 55e586d..6ea73f3 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -50,6 +50,10 @@
 	thunk trace_hardirqs_on_thunk,trace_hardirqs_on
 	thunk trace_hardirqs_off_thunk,trace_hardirqs_off
 #endif
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	thunk lockdep_sys_exit_thunk,lockdep_sys_exit
+#endif
 	
 	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
 	CFI_STARTPROC
diff --git a/include/asm-x86/irqflags_64.h b/include/asm-x86/irqflags_64.h
index 86e70fe..5341ea1 100644
--- a/include/asm-x86/irqflags_64.h
+++ b/include/asm-x86/irqflags_64.h
@@ -137,6 +137,20 @@ static inline void halt(void)
 #  define TRACE_IRQS_ON
 #  define TRACE_IRQS_OFF
 # endif
+# ifdef CONFIG_DEBUG_LOCK_ALLOC
+#  define LOCKDEP_SYS_EXIT	call lockdep_sys_exit_thunk
+#  define LOCKDEP_SYS_EXIT_IRQ	\
+	TRACE_IRQS_ON; \
+	sti; \
+	SAVE_REST; \
+	LOCKDEP_SYS_EXIT; \
+	RESTORE_REST; \
+	cli; \
+	TRACE_IRQS_OFF;
+# else
+#  define LOCKDEP_SYS_EXIT
+#  define LOCKDEP_SYS_EXIT_IRQ
+# endif
 #endif
 
 #endif
-- 
cgit v0.10.2


From 523b44cff279c42c79f7bda709e2fefc30f20a59 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 11 Oct 2007 22:11:12 +0200
Subject: lockdep: s390: connect the sysexit hook Run the lockdep_sys_exit hook
 before returning to user space.

Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index f3bceb1..139ca15 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -68,9 +68,15 @@ STACK_SIZE  = 1 << STACK_SHIFT
 	l	%r1,BASED(.Ltrace_irq_off)
 	basr	%r14,%r1
 	.endm
+
+	.macro	LOCKDEP_SYS_EXIT
+	l	%r1,BASED(.Llockdep_sys_exit)
+	basr	%r14,%r1
+	.endm
 #else
 #define TRACE_IRQS_ON
 #define TRACE_IRQS_OFF
+#define LOCKDEP_SYS_EXIT
 #endif
 
 /*
@@ -260,6 +266,7 @@ sysc_return:
 	bno	BASED(sysc_leave)
 	tm	__TI_flags+3(%r9),_TIF_WORK_SVC
 	bnz	BASED(sysc_work)  # there is work to do (signals etc.)
+	LOCKDEP_SYS_EXIT
 sysc_leave:
 	RESTORE_ALL __LC_RETURN_PSW,1
 
@@ -283,6 +290,7 @@ sysc_work:
 	bo	BASED(sysc_restart)
 	tm	__TI_flags+3(%r9),_TIF_SINGLE_STEP
 	bo	BASED(sysc_singlestep)
+	LOCKDEP_SYS_EXIT
 	b	BASED(sysc_leave)
 
 #
@@ -572,6 +580,7 @@ io_return:
 #endif
 	tm	__TI_flags+3(%r9),_TIF_WORK_INT
 	bnz	BASED(io_work)		# there is work to do (signals etc.)
+	LOCKDEP_SYS_EXIT
 io_leave:
 	RESTORE_ALL __LC_RETURN_PSW,0
 io_done:
@@ -618,6 +627,7 @@ io_work_loop:
 	bo	BASED(io_reschedule)
 	tm	__TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)
 	bnz	BASED(io_sigpending)
+	LOCKDEP_SYS_EXIT
 	b	BASED(io_leave)
 
 #
@@ -1040,6 +1050,8 @@ cleanup_io_leave_insn:
 .Ltrace_irq_on: .long	trace_hardirqs_on
 .Ltrace_irq_off:
 		.long	trace_hardirqs_off
+.Llockdep_sys_exit:
+		.long	lockdep_sys_exit
 #endif
 .Lcritical_start:
 		.long	__critical_start + 0x80000000
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9c0d5cc..05e26d1 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -66,9 +66,14 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \
 	.macro	TRACE_IRQS_OFF
 	 brasl	%r14,trace_hardirqs_off
 	.endm
+
+	.macro	LOCKDEP_SYS_EXIT
+	 brasl	%r14,lockdep_sys_exit
+	.endm
 #else
 #define TRACE_IRQS_ON
 #define TRACE_IRQS_OFF
+#define LOCKDEP_SYS_EXIT
 #endif
 
 	.macro	STORE_TIMER lc_offset
@@ -255,6 +260,7 @@ sysc_return:
 	jno	sysc_leave
 	tm	__TI_flags+7(%r9),_TIF_WORK_SVC
 	jnz	sysc_work	# there is work to do (signals etc.)
+	LOCKDEP_SYS_EXIT
 sysc_leave:
 	RESTORE_ALL __LC_RETURN_PSW,1
 
@@ -278,6 +284,7 @@ sysc_work:
 	jo	sysc_restart
 	tm	__TI_flags+7(%r9),_TIF_SINGLE_STEP
 	jo	sysc_singlestep
+	LOCKDEP_SYS_EXIT
 	j	sysc_leave
 
 #
@@ -558,6 +565,7 @@ io_return:
 #endif
 	tm	__TI_flags+7(%r9),_TIF_WORK_INT
 	jnz	io_work 		# there is work to do (signals etc.)
+	LOCKDEP_SYS_EXIT
 io_leave:
 	RESTORE_ALL __LC_RETURN_PSW,0
 io_done:
@@ -605,6 +613,7 @@ io_work_loop:
 	jo	io_reschedule
 	tm	__TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)
 	jnz	io_sigpending
+	LOCKDEP_SYS_EXIT
 	j	io_leave
 
 #
-- 
cgit v0.10.2


From 34a3d1e83708702ac6cb872215e68cd07dae298b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Oct 2007 22:11:12 +0200
Subject: lockdep: annotate journal_start()

On Fri, 2007-07-13 at 02:05 -0700, Andrew Morton wrote:

> Except lockdep doesn't know about journal_start(), which has ranking
> requirements similar to a semaphore.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 772b653..8df5bac 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -233,6 +233,8 @@ out:
 	return ret;
 }
 
+static struct lock_class_key jbd_handle_key;
+
 /* Allocate a new handle.  This should probably be in a slab... */
 static handle_t *new_handle(int nblocks)
 {
@@ -243,6 +245,8 @@ static handle_t *new_handle(int nblocks)
 	handle->h_buffer_credits = nblocks;
 	handle->h_ref = 1;
 
+	lockdep_init_map(&handle->h_lockdep_map, "jbd_handle", &jbd_handle_key, 0);
+
 	return handle;
 }
 
@@ -286,6 +290,9 @@ handle_t *journal_start(journal_t *journal, int nblocks)
 		current->journal_info = NULL;
 		handle = ERR_PTR(err);
 	}
+
+	lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+
 	return handle;
 }
 
@@ -1411,6 +1418,8 @@ int journal_stop(handle_t *handle)
 		spin_unlock(&journal->j_state_lock);
 	}
 
+	lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
+
 	jbd_free_handle(handle);
 	return err;
 }
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 4527375..700a93b 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -30,6 +30,7 @@
 #include <linux/bit_spinlock.h>
 #include <linux/mutex.h>
 #include <linux/timer.h>
+#include <linux/lockdep.h>
 
 #include <asm/semaphore.h>
 #endif
@@ -396,6 +397,10 @@ struct handle_s
 	unsigned int	h_sync:		1;	/* sync-on-close */
 	unsigned int	h_jdata:	1;	/* force data journaling */
 	unsigned int	h_aborted:	1;	/* fatal error on handle */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map	h_lockdep_map;
+#endif
 };
 
 
-- 
cgit v0.10.2


From 851a67b825540a8e00c0be3ee25e4627ba8b133b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Oct 2007 22:11:12 +0200
Subject: lockdep: annotate rcu_read_{,un}lock{,_bh}

lockdep annotate rcu_read_{,un}lock{,_bh} in order to catch imbalanced
usage.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 9dc46db..f6279f6 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -253,6 +253,13 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
 			     struct lock_class_key *key, int subclass);
 
 /*
+ * To initialize a lockdep_map statically use this macro.
+ * Note that _name must not be NULL.
+ */
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+	{ .name = (_name), .key = (void *)(_key), }
+
+/*
  * Reinitialize a lock key - for cases where there is special locking or
  * special initialization of locks so that the validator gets the scope
  * of dependencies wrong: they are either too broad (they need a class-split)
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index fe17d7d..76c1a53 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -41,6 +41,7 @@
 #include <linux/percpu.h>
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
+#include <linux/lockdep.h>
 
 /**
  * struct rcu_head - callback structure for use with RCU
@@ -133,6 +134,15 @@ static inline void rcu_bh_qsctr_inc(int cpu)
 extern int rcu_pending(int cpu);
 extern int rcu_needs_cpu(int cpu);
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern struct lockdep_map rcu_lock_map;
+# define rcu_read_acquire()	lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_)
+# define rcu_read_release()	lock_release(&rcu_lock_map, 1, _THIS_IP_)
+#else
+# define rcu_read_acquire()	do { } while (0)
+# define rcu_read_release()	do { } while (0)
+#endif
+
 /**
  * rcu_read_lock - mark the beginning of an RCU read-side critical section.
  *
@@ -166,6 +176,7 @@ extern int rcu_needs_cpu(int cpu);
 	do { \
 		preempt_disable(); \
 		__acquire(RCU); \
+		rcu_read_acquire(); \
 	} while(0)
 
 /**
@@ -175,6 +186,7 @@ extern int rcu_needs_cpu(int cpu);
  */
 #define rcu_read_unlock() \
 	do { \
+		rcu_read_release(); \
 		__release(RCU); \
 		preempt_enable(); \
 	} while(0)
@@ -204,6 +216,7 @@ extern int rcu_needs_cpu(int cpu);
 	do { \
 		local_bh_disable(); \
 		__acquire(RCU_BH); \
+		rcu_read_acquire(); \
 	} while(0)
 
 /*
@@ -213,6 +226,7 @@ extern int rcu_needs_cpu(int cpu);
  */
 #define rcu_read_unlock_bh() \
 	do { \
+		rcu_read_release(); \
 		__release(RCU_BH); \
 		local_bh_enable(); \
 	} while(0)
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2c2dd84..130214f 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -49,6 +49,14 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static struct lock_class_key rcu_lock_key;
+struct lockdep_map rcu_lock_map =
+	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
+
+EXPORT_SYMBOL_GPL(rcu_lock_map);
+#endif
+
 /* Definition for rcupdate control block. */
 static struct rcu_ctrlblk rcu_ctrlblk = {
 	.cur = -300,
-- 
cgit v0.10.2


From 58dfe883d3bc3b4c08c53a7f39e2ca3ec84f089e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 11 Oct 2007 22:25:25 +0200
Subject: lockdep: annotate kprobes irq fiddling kprobes disables irqs for
 jprobes, but does not tell lockdep about it.

This resolves this warning during an allyesconfig bzImage bootup test:

 [  423.670337] WARNING: at kernel/lockdep.c:2658 check_flags()
 [  423.670341]  [<c0107f01>] show_trace_log_lvl+0x19/0x2e
 [  423.670348]  [<c0107ffa>] show_trace+0x12/0x14
 [  423.670350]  [<c0108010>] dump_stack+0x14/0x16
 [  423.670353]  [<c015249d>] check_flags+0x95/0x142
 [  423.670357]  [<c0155576>] lock_acquire+0x52/0xb8
 [  423.670360]  [<c1313c90>] _spin_lock+0x2e/0x58
 [  423.670365]  [<c11b72f9>] jtcp_rcv_established+0x6e/0x189
 [  423.670369]  [<c11810da>] tcp_v4_do_rcv+0x30b/0x620
 [  423.670373]  [<c1181c8c>] tcp_v4_rcv+0x89d/0x8fa
 [  423.670376]  [<c1167dba>] ip_local_deliver+0x17d/0x225
 [  423.670380]  [<c11682f5>] ip_rcv+0x493/0x4ce
 [  423.670383]  [<c11177ef>] netif_receive_skb+0x347/0x365
 [  423.670388]  [<c07b6e7b>] nv_napi_poll+0x501/0x6c3
 [  423.670393]  [<c1115b1a>] net_rx_action+0xa3/0x1b6
 [  423.670396]  [<c013bdee>] __do_softirq+0x76/0xfb
 [  423.670400]  [<c0109189>] do_softirq+0x75/0xf3

[ akpm: checkpatch.pl cleanups ]

Cc: Prasanna S Panchamukhi <prasanna@in.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

diff --git a/arch/x86/kernel/kprobes_32.c b/arch/x86/kernel/kprobes_32.c
index c2d03e9..e7d0d3c 100644
--- a/arch/x86/kernel/kprobes_32.c
+++ b/arch/x86/kernel/kprobes_32.c
@@ -557,6 +557,12 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
 
 	resume_execution(cur, regs, kcb);
 	regs->eflags |= kcb->kprobe_saved_eflags;
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+	if (raw_irqs_disabled_flags(regs->eflags))
+		trace_hardirqs_off();
+	else
+		trace_hardirqs_on();
+#endif
 
 	/*Restore back the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -694,6 +700,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
 			MIN_STACK_SIZE(addr));
 	regs->eflags &= ~IF_MASK;
+	trace_hardirqs_off();
 	regs->eip = (unsigned long)(jp->entry);
 	return 1;
 }
diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c
index 1df17a0..62e28e5 100644
--- a/arch/x86/kernel/kprobes_64.c
+++ b/arch/x86/kernel/kprobes_64.c
@@ -544,6 +544,12 @@ int __kprobes post_kprobe_handler(struct pt_regs *regs)
 
 	resume_execution(cur, regs, kcb);
 	regs->eflags |= kcb->kprobe_saved_rflags;
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+	if (raw_irqs_disabled_flags(regs->eflags))
+		trace_hardirqs_off();
+	else
+		trace_hardirqs_on();
+#endif
 
 	/* Restore the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -684,6 +690,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
 			MIN_STACK_SIZE(addr));
 	regs->eflags &= ~IF_MASK;
+	trace_hardirqs_off();
 	regs->rip = (unsigned long)(jp->entry);
 	return 1;
 }
-- 
cgit v0.10.2


From d475fd428ce77aa2a8bc650d230e17663a4f49c3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 15 Oct 2007 14:51:31 +0200
Subject: lockdep: per filesystem inode lock class Give each filesystem its own
 inode lock class. The various filesystems have different locking order wrt
 the inode locks; esp. the pseudo filesystems differ from the rest.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/fs/inode.c b/fs/inode.c
index 29f5068..bf6adf1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -142,6 +142,15 @@ static struct inode *alloc_inode(struct super_block *sb)
 			return NULL;
 		}
 
+		spin_lock_init(&inode->i_lock);
+		lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key);
+
+		mutex_init(&inode->i_mutex);
+		lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
+
+		init_rwsem(&inode->i_alloc_sem);
+		lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
+
 		mapping->a_ops = &empty_aops;
  		mapping->host = inode;
 		mapping->flags = 0;
@@ -190,8 +199,6 @@ void inode_init_once(struct inode *inode)
 	INIT_HLIST_NODE(&inode->i_hash);
 	INIT_LIST_HEAD(&inode->i_dentry);
 	INIT_LIST_HEAD(&inode->i_devices);
-	mutex_init(&inode->i_mutex);
-	init_rwsem(&inode->i_alloc_sem);
 	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
 	rwlock_init(&inode->i_data.tree_lock);
 	spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -199,7 +206,6 @@ void inode_init_once(struct inode *inode)
 	spin_lock_init(&inode->i_data.private_lock);
 	INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
 	INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
-	spin_lock_init(&inode->i_lock);
 	i_size_ordered_init(inode);
 #ifdef CONFIG_INOTIFY
 	INIT_LIST_HEAD(&inode->inotify_watches);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 16421f6..0cad20e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1302,8 +1302,13 @@ struct file_system_type {
 	struct module *owner;
 	struct file_system_type * next;
 	struct list_head fs_supers;
+
 	struct lock_class_key s_lock_key;
 	struct lock_class_key s_umount_key;
+
+	struct lock_class_key i_lock_key;
+	struct lock_class_key i_mutex_key;
+	struct lock_class_key i_alloc_sem_key;
 };
 
 extern int get_sb_bdev(struct file_system_type *fs_type,
-- 
cgit v0.10.2


From 14358e6ddaed27499d7d366b3e65c3e46b39e1c4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sun, 14 Oct 2007 01:38:33 +0200
Subject: lockdep: annotate dir vs file i_mutex

On Mon, 2007-09-24 at 22:13 -0400, Steven Rostedt wrote:
> The circular lock seems to be this:
>
> #1:
>
>   sys_mmap2:              down_write(&mm->mmap_sem);
>   nfs_revalidate_mapping: mutex_lock(&inode->i_mutex);
>
>
> #0:
>
>   vfs_readdir:     mutex_lock(&inode->i_mutex);
>    - during the readdir (filldir64), we take a user fault (missing page?)
>     and call do_page_fault -
>   do_page_fault:   down_read(&mm->mmap_sem);
>
>
> So it does indeed look like a circular locking. Now the question is, "is
> this a bug?".  Looking like the inode of #1 must be a file or something
> else that you can mmap and the inode of #0 seems it must be a directory.
> I would say "no".
>
> Now if you can readdir on a file or mmap a directory, then this could be
> an issue.
>
> Otherwise, I'd love to see someone teach lockdep about this issue! ;-)

Make a distinction between file and dir usage of i_mutex.
The inode should be complete and unused at unlock_new_inode(), re-init
i_mutex depending on its type.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/fs/inode.c b/fs/inode.c
index bf6adf1..f97de0a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -567,6 +567,18 @@ EXPORT_SYMBOL(new_inode);
 
 void unlock_new_inode(struct inode *inode)
 {
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct file_system_type *type = inode->i_sb->s_type;
+	/*
+	 * ensure nobody is actually holding i_mutex
+	 */
+	mutex_destroy(&inode->i_mutex);
+	mutex_init(&inode->i_mutex);
+	if (inode->i_mode & S_IFDIR)
+		lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key);
+	else
+		lockdep_set_class(&inode->i_mutex, &type->i_mutex_key);
+#endif
 	/*
 	 * This is special!  We do not need the spinlock
 	 * when clearing I_LOCK, because we're guaranteed
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0cad20e..6d760f1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1308,6 +1308,7 @@ struct file_system_type {
 
 	struct lock_class_key i_lock_key;
 	struct lock_class_key i_mutex_key;
+	struct lock_class_key i_mutex_dir_key;
 	struct lock_class_key i_alloc_sem_key;
 };
 
-- 
cgit v0.10.2