From 50fb4f7fc907efff65eadb0b74387a9ffed6e849 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:22 -0800
Subject: freezer: fix current->state restoration race in refrigerator()

refrigerator() saves current->state before entering frozen state and
restores it before returning using __set_current_state(); however,
this is racy, for example, please consider the following sequence.

	set_current_state(TASK_INTERRUPTIBLE);
	try_to_freeze();
	if (kthread_should_stop())
		break;
	schedule();

If kthread_stop() races with ->state restoration, the restoration can
restore ->state to TASK_INTERRUPTIBLE after kthread_stop() sets it to
TASK_RUNNING but kthread_should_stop() may still see zero
->should_stop because there's no memory barrier between restoring
TASK_INTERRUPTIBLE and kthread_should_stop() test.

This isn't restricted to kthread_should_stop().  current->state is
often used in memory barrier based synchronization and silently
restoring it w/o mb breaks them.

Use set_current_state() instead.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/kernel/freezer.c b/kernel/freezer.c
index 7be56c5..3f46010 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -58,7 +58,13 @@ void refrigerator(void)
 	current->flags &= ~PF_FREEZING;
 
 	pr_debug("%s left refrigerator\n", current->comm);
-	__set_current_state(save);
+
+	/*
+	 * Restore saved task state before returning.  The mb'd version
+	 * needs to be used; otherwise, it might silently break
+	 * synchronization which depends on ordered task state change.
+	 */
+	set_current_state(save);
 }
 EXPORT_SYMBOL(refrigerator);
 
-- 
cgit v0.10.2


From 3a7cbd50f74907580eb47a8d08e1f29741b81abf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:22 -0800
Subject: freezer: don't unnecessarily set PF_NOFREEZE explicitly

Some drivers set PF_NOFREEZE in their kthread functions which is
completely unnecessary and racy - some part of freezer code doesn't
consider cases where PF_NOFREEZE is set asynchronous to freezer
operations.

In general, there's no reason to allow setting PF_NOFREEZE explicitly.
Remove them and change the documentation to note that setting
PF_NOFREEZE directly isn't allowed.

-v2: Dropped change to twl4030-irq.c as it no longer uses PF_NOFREEZE.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: "Gustavo F. Padovan" <padovan@profusion.mobi>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: wwang <wei_wang@realsil.com.cn>

diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index 316c2ba..587e082 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -67,7 +67,7 @@ III. Which kernel threads are freezable?
 
 Kernel threads are not freezable by default.  However, a kernel thread may clear
 PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE
-directly is strongly discouraged).  From this point it is regarded as freezable
+directly is not allowed).  From this point it is regarded as freezable
 and must call try_to_freeze() in a suitable place.
 
 IV. Why do we do that?
diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c
index a88a78c..6c3defa 100644
--- a/drivers/bluetooth/btmrvl_main.c
+++ b/drivers/bluetooth/btmrvl_main.c
@@ -475,8 +475,6 @@ static int btmrvl_service_main_thread(void *data)
 
 	init_waitqueue_entry(&wait, current);
 
-	current->flags |= PF_NOFREEZE;
-
 	for (;;) {
 		add_wait_queue(&thread->wait_q, &wait);
 
diff --git a/drivers/mfd/twl6030-irq.c b/drivers/mfd/twl6030-irq.c
index 3eee45f..c6b456a 100644
--- a/drivers/mfd/twl6030-irq.c
+++ b/drivers/mfd/twl6030-irq.c
@@ -138,8 +138,6 @@ static int twl6030_irq_thread(void *data)
 	static const unsigned max_i2c_errors = 100;
 	int ret;
 
-	current->flags |= PF_NOFREEZE;
-
 	while (!kthread_should_stop()) {
 		int i;
 		union {
diff --git a/drivers/staging/rts_pstor/rtsx.c b/drivers/staging/rts_pstor/rtsx.c
index 480b0ed..8a7803c 100644
--- a/drivers/staging/rts_pstor/rtsx.c
+++ b/drivers/staging/rts_pstor/rtsx.c
@@ -466,8 +466,6 @@ static int rtsx_control_thread(void *__dev)
 	struct rtsx_chip *chip = dev->chip;
 	struct Scsi_Host *host = rtsx_to_host(dev);
 
-	current->flags |= PF_NOFREEZE;
-
 	for (;;) {
 		if (wait_for_completion_interruptible(&dev->cmnd_ready))
 			break;
-- 
cgit v0.10.2


From a0acae0e886d44bd5ce6d2f173c1ace0fcf0d9f6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:22 -0800
Subject: freezer: unexport refrigerator() and update try_to_freeze() slightly

There is no reason to export two functions for entering the
refrigerator.  Calling refrigerator() instead of try_to_freeze()
doesn't save anything noticeable or removes any race condition.

* Rename refrigerator() to __refrigerator() and make it return bool
  indicating whether it scheduled out for freezing.

* Update try_to_freeze() to return bool and relay the return value of
  __refrigerator() if freezing().

* Convert all refrigerator() users to try_to_freeze().

* Update documentation accordingly.

* While at it, add might_sleep() to try_to_freeze().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Samuel Ortiz <samuel@sortiz.org>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jan Kara <jack@suse.cz>
Cc: KONISHI Ryusuke <konishi.ryusuke@lab.ntt.co.jp>
Cc: Christoph Hellwig <hch@infradead.org>

diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index 587e082..3ab9fbd 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -21,7 +21,7 @@ freeze_processes() (defined in kernel/power/process.c) is called.  It executes
 try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and
 either wakes them up, if they are kernel threads, or sends fake signals to them,
 if they are user space processes.  A task that has TIF_FREEZE set, should react
-to it by calling the function called refrigerator() (defined in
+to it by calling the function called __refrigerator() (defined in
 kernel/freezer.c), which sets the task's PF_FROZEN flag, changes its state
 to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is cleared for it.
 Then, we say that the task is 'frozen' and therefore the set of functions
@@ -29,10 +29,10 @@ handling this mechanism is referred to as 'the freezer' (these functions are
 defined in kernel/power/process.c, kernel/freezer.c & include/linux/freezer.h).
 User space processes are generally frozen before kernel threads.
 
-It is not recommended to call refrigerator() directly.  Instead, it is
-recommended to use the try_to_freeze() function (defined in
-include/linux/freezer.h), that checks the task's TIF_FREEZE flag and makes the
-task enter refrigerator() if the flag is set.
+__refrigerator() must not be called directly.  Instead, use the
+try_to_freeze() function (defined in include/linux/freezer.h), that checks
+the task's TIF_FREEZE flag and makes the task enter __refrigerator() if the
+flag is set.
 
 For user space processes try_to_freeze() is called automatically from the
 signal-handling code, but the freezable kernel threads need to call it
@@ -61,7 +61,7 @@ wait_event_freezable() and wait_event_freezable_timeout() macros.
 After the system memory state has been restored from a hibernation image and
 devices have been reinitialized, the function thaw_processes() is called in
 order to clear the PF_FROZEN flag for each frozen task.  Then, the tasks that
-have been frozen leave refrigerator() and continue running.
+have been frozen leave __refrigerator() and continue running.
 
 III. Which kernel threads are freezable?
 
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index 41c96b3..e880c79 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -750,7 +750,7 @@ static int stir_transmit_thread(void *arg)
 
 			write_reg(stir, REG_CTRL1, CTRL1_TXPWD|CTRL1_RXPWD);
 
-			refrigerator();
+			try_to_freeze();
 
 			if (change_speed(stir, stir->speed))
 				break;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7ec1409..98ab240 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -340,7 +340,7 @@ again:
 		if (freezing(current)) {
 			worker->working = 0;
 			spin_unlock_irq(&worker->lock);
-			refrigerator();
+			try_to_freeze();
 		} else {
 			spin_unlock_irq(&worker->lock);
 			if (!kthread_should_stop()) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62afe5c..622654f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1579,9 +1579,7 @@ static int cleaner_kthread(void *arg)
 			btrfs_run_defrag_inodes(root->fs_info);
 		}
 
-		if (freezing(current)) {
-			refrigerator();
-		} else {
+		if (!try_to_freeze()) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (!kthread_should_stop())
 				schedule();
@@ -1635,9 +1633,7 @@ sleep:
 		wake_up_process(root->fs_info->cleaner_kthread);
 		mutex_unlock(&root->fs_info->transaction_kthread_mutex);
 
-		if (freezing(current)) {
-			refrigerator();
-		} else {
+		if (!try_to_freeze()) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			if (!kthread_should_stop() &&
 			    !btrfs_transaction_blocked(root->fs_info))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9953d80..877350e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2882,8 +2882,7 @@ cont_thread:
 		}
 		mutex_unlock(&eli->li_list_mtx);
 
-		if (freezing(current))
-			refrigerator();
+		try_to_freeze();
 
 		cur = jiffies;
 		if ((time_after_eq(cur, next_wakeup)) ||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 5986464..8154d42 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -951,8 +951,8 @@ int gfs2_logd(void *data)
 			wake_up(&sdp->sd_log_waitq);
 
 		t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
-		if (freezing(current))
-			refrigerator();
+
+		try_to_freeze();
 
 		do {
 			prepare_to_wait(&sdp->sd_logd_waitq, &wait,
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 7e528dc..d49669e 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1427,8 +1427,8 @@ int gfs2_quotad(void *data)
 		/* Check for & recover partially truncated inodes */
 		quotad_check_trunc_list(sdp);
 
-		if (freezing(current))
-			refrigerator();
+		try_to_freeze();
+
 		t = min(quotad_timeo, statfs_timeo);
 
 		prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index fea8dd6..a96cff0 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -166,7 +166,7 @@ loop:
 		 */
 		jbd_debug(1, "Now suspending kjournald\n");
 		spin_unlock(&journal->j_state_lock);
-		refrigerator();
+		try_to_freeze();
 		spin_lock(&journal->j_state_lock);
 	} else {
 		/*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0fa0123..c0a5f9f 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -173,7 +173,7 @@ loop:
 		 */
 		jbd_debug(1, "Now suspending kjournald2\n");
 		write_unlock(&journal->j_state_lock);
-		refrigerator();
+		try_to_freeze();
 		write_lock(&journal->j_state_lock);
 	} else {
 		/*
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index cc5f811..2eb952c 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2349,7 +2349,7 @@ int jfsIOWait(void *arg)
 
 		if (freezing(current)) {
 			spin_unlock_irq(&log_redrive_lock);
-			refrigerator();
+			try_to_freeze();
 		} else {
 			set_current_state(TASK_INTERRUPTIBLE);
 			spin_unlock_irq(&log_redrive_lock);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index af96060..bb8b661 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2800,7 +2800,7 @@ int jfs_lazycommit(void *arg)
 
 		if (freezing(current)) {
 			LAZY_UNLOCK(flags);
-			refrigerator();
+			try_to_freeze();
 		} else {
 			DECLARE_WAITQUEUE(wq, current);
 
@@ -2994,7 +2994,7 @@ int jfs_sync(void *arg)
 
 		if (freezing(current)) {
 			TXN_UNLOCK();
-			refrigerator();
+			try_to_freeze();
 		} else {
 			set_current_state(TASK_INTERRUPTIBLE);
 			TXN_UNLOCK();
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index bb24ab6..0e72ad6 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2470,7 +2470,7 @@ static int nilfs_segctor_thread(void *arg)
 
 	if (freezing(current)) {
 		spin_unlock(&sci->sc_state_lock);
-		refrigerator();
+		try_to_freeze();
 		spin_lock(&sci->sc_state_lock);
 	} else {
 		DEFINE_WAIT(wait);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index cf0ac05..0188299 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1703,7 +1703,7 @@ xfsbufd(
 
 		if (unlikely(freezing(current))) {
 			set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
-			refrigerator();
+			try_to_freeze();
 		} else {
 			clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
 		}
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index a5386e3..7a9427e 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -47,18 +47,17 @@ static inline bool should_send_signal(struct task_struct *p)
 /* Takes and releases task alloc lock using task_lock() */
 extern int thaw_process(struct task_struct *p);
 
-extern void refrigerator(void);
+extern bool __refrigerator(void);
 extern int freeze_processes(void);
 extern int freeze_kernel_threads(void);
 extern void thaw_processes(void);
 
-static inline int try_to_freeze(void)
+static inline bool try_to_freeze(void)
 {
-	if (freezing(current)) {
-		refrigerator();
-		return 1;
-	} else
-		return 0;
+	might_sleep();
+	if (likely(!freezing(current)))
+		return false;
+	return __refrigerator();
 }
 
 extern bool freeze_task(struct task_struct *p, bool sig_only);
@@ -181,12 +180,12 @@ static inline void set_freeze_flag(struct task_struct *p) {}
 static inline void clear_freeze_flag(struct task_struct *p) {}
 static inline int thaw_process(struct task_struct *p) { return 1; }
 
-static inline void refrigerator(void) {}
+static inline bool __refrigerator(void) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
 static inline int freeze_kernel_threads(void) { return -ENOSYS; }
 static inline void thaw_processes(void) {}
 
-static inline int try_to_freeze(void) { return 0; }
+static inline bool try_to_freeze(void) { return false; }
 
 static inline void freezer_do_not_count(void) {}
 static inline void freezer_count(void) {}
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 3f46010..732f14f 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -23,10 +23,11 @@ static inline void frozen_process(void)
 }
 
 /* Refrigerator is place where frozen processes are stored :-). */
-void refrigerator(void)
+bool __refrigerator(void)
 {
 	/* Hmm, should we be allowed to suspend when there are realtime
 	   processes around? */
+	bool was_frozen = false;
 	long save;
 
 	task_lock(current);
@@ -35,7 +36,7 @@ void refrigerator(void)
 		task_unlock(current);
 	} else {
 		task_unlock(current);
-		return;
+		return was_frozen;
 	}
 	save = current->state;
 	pr_debug("%s entered refrigerator\n", current->comm);
@@ -51,6 +52,7 @@ void refrigerator(void)
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!frozen(current))
 			break;
+		was_frozen = true;
 		schedule();
 	}
 
@@ -65,8 +67,10 @@ void refrigerator(void)
 	 * synchronization which depends on ordered task state change.
 	 */
 	set_current_state(save);
+
+	return was_frozen;
 }
-EXPORT_SYMBOL(refrigerator);
+EXPORT_SYMBOL(__refrigerator);
 
 static void fake_signal_wake_up(struct task_struct *p)
 {
-- 
cgit v0.10.2


From 8a32c441c1609f80e55df75422324a1151208f40 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:23 -0800
Subject: freezer: implement and use kthread_freezable_should_stop()

Writeback and thinkpad_acpi have been using thaw_process() to prevent
deadlock between the freezer and kthread_stop(); unfortunately, this
is inherently racy - nothing prevents freezing from happening between
thaw_process() and kthread_stop().

This patch implements kthread_freezable_should_stop() which enters
refrigerator if necessary but is guaranteed to return if
kthread_stop() is invoked.  Both thaw_process() users are converted to
use the new function.

Note that this deadlock condition exists for many of freezable
kthreads.  They need to be converted to use the new should_stop or
freezable workqueue.

Tested with synthetic test case.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Henrique de Moraes Holschuh <ibm-acpi@hmh.eng.br>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Oleg Nesterov <oleg@redhat.com>

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 7b82868..4b11fc9 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2456,8 +2456,9 @@ static int hotkey_kthread(void *data)
 	u32 poll_mask, event_mask;
 	unsigned int si, so;
 	unsigned long t;
-	unsigned int change_detector, must_reset;
+	unsigned int change_detector;
 	unsigned int poll_freq;
+	bool was_frozen;
 
 	mutex_lock(&hotkey_thread_mutex);
 
@@ -2488,14 +2489,14 @@ static int hotkey_kthread(void *data)
 				t = 100;	/* should never happen... */
 		}
 		t = msleep_interruptible(t);
-		if (unlikely(kthread_should_stop()))
+		if (unlikely(kthread_freezable_should_stop(&was_frozen)))
 			break;
-		must_reset = try_to_freeze();
-		if (t > 0 && !must_reset)
+
+		if (t > 0 && !was_frozen)
 			continue;
 
 		mutex_lock(&hotkey_thread_data_mutex);
-		if (must_reset || hotkey_config_change != change_detector) {
+		if (was_frozen || hotkey_config_change != change_detector) {
 			/* forget old state on thaw or config change */
 			si = so;
 			t = 0;
@@ -2528,10 +2529,6 @@ exit:
 static void hotkey_poll_stop_sync(void)
 {
 	if (tpacpi_hotkey_task) {
-		if (frozen(tpacpi_hotkey_task) ||
-		    freezing(tpacpi_hotkey_task))
-			thaw_process(tpacpi_hotkey_task);
-
 		kthread_stop(tpacpi_hotkey_task);
 		tpacpi_hotkey_task = NULL;
 		mutex_lock(&hotkey_thread_mutex);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 73c3992..271fde5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -947,7 +947,7 @@ int bdi_writeback_thread(void *data)
 
 	trace_writeback_thread_start(bdi);
 
-	while (!kthread_should_stop()) {
+	while (!kthread_freezable_should_stop(NULL)) {
 		/*
 		 * Remove own delayed wake-up timer, since we are already awake
 		 * and we'll take care of the preriodic write-back.
@@ -977,8 +977,6 @@ int bdi_writeback_thread(void *data)
 			 */
 			schedule();
 		}
-
-		try_to_freeze();
 	}
 
 	/* Flush any work that raced with us exiting */
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 7a9427e..d02b784 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -47,7 +47,7 @@ static inline bool should_send_signal(struct task_struct *p)
 /* Takes and releases task alloc lock using task_lock() */
 extern int thaw_process(struct task_struct *p);
 
-extern bool __refrigerator(void);
+extern bool __refrigerator(bool check_kthr_stop);
 extern int freeze_processes(void);
 extern int freeze_kernel_threads(void);
 extern void thaw_processes(void);
@@ -57,7 +57,7 @@ static inline bool try_to_freeze(void)
 	might_sleep();
 	if (likely(!freezing(current)))
 		return false;
-	return __refrigerator();
+	return __refrigerator(false);
 }
 
 extern bool freeze_task(struct task_struct *p, bool sig_only);
@@ -180,7 +180,7 @@ static inline void set_freeze_flag(struct task_struct *p) {}
 static inline void clear_freeze_flag(struct task_struct *p) {}
 static inline int thaw_process(struct task_struct *p) { return 1; }
 
-static inline bool __refrigerator(void) { return false; }
+static inline bool __refrigerator(bool check_kthr_stop) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
 static inline int freeze_kernel_threads(void) { return -ENOSYS; }
 static inline void thaw_processes(void) {}
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 5cac19b..0714b24 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -35,6 +35,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
 void kthread_bind(struct task_struct *k, unsigned int cpu);
 int kthread_stop(struct task_struct *k);
 int kthread_should_stop(void);
+bool kthread_freezable_should_stop(bool *was_frozen);
 void *kthread_data(struct task_struct *k);
 
 int kthreadd(void *unused);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 732f14f..b83c30e 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -9,6 +9,7 @@
 #include <linux/export.h>
 #include <linux/syscalls.h>
 #include <linux/freezer.h>
+#include <linux/kthread.h>
 
 /*
  * freezing is complete, mark current process as frozen
@@ -23,7 +24,7 @@ static inline void frozen_process(void)
 }
 
 /* Refrigerator is place where frozen processes are stored :-). */
-bool __refrigerator(void)
+bool __refrigerator(bool check_kthr_stop)
 {
 	/* Hmm, should we be allowed to suspend when there are realtime
 	   processes around? */
@@ -50,7 +51,8 @@ bool __refrigerator(void)
 
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (!frozen(current))
+		if (!frozen(current) ||
+		    (check_kthr_stop && kthread_should_stop()))
 			break;
 		was_frozen = true;
 		schedule();
diff --git a/kernel/kthread.c b/kernel/kthread.c
index b6d216a..1c36dea 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -59,6 +59,31 @@ int kthread_should_stop(void)
 EXPORT_SYMBOL(kthread_should_stop);
 
 /**
+ * kthread_freezable_should_stop - should this freezable kthread return now?
+ * @was_frozen: optional out parameter, indicates whether %current was frozen
+ *
+ * kthread_should_stop() for freezable kthreads, which will enter
+ * refrigerator if necessary.  This function is safe from kthread_stop() /
+ * freezer deadlock and freezable kthreads should use this function instead
+ * of calling try_to_freeze() directly.
+ */
+bool kthread_freezable_should_stop(bool *was_frozen)
+{
+	bool frozen = false;
+
+	might_sleep();
+
+	if (unlikely(freezing(current)))
+		frozen = __refrigerator(true);
+
+	if (was_frozen)
+		*was_frozen = frozen;
+
+	return kthread_should_stop();
+}
+EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
+
+/**
  * kthread_data - return data value specified on kthread creation
  * @task: kthread task in question
  *
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 71034f4..7ba8fea 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -600,14 +600,10 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 
 	/*
 	 * Finally, kill the kernel thread. We don't need to be RCU
-	 * safe anymore, since the bdi is gone from visibility. Force
-	 * unfreeze of the thread before calling kthread_stop(), otherwise
-	 * it would never exet if it is currently stuck in the refrigerator.
+	 * safe anymore, since the bdi is gone from visibility.
 	 */
-	if (bdi->wb.task) {
-		thaw_process(bdi->wb.task);
+	if (bdi->wb.task)
 		kthread_stop(bdi->wb.task);
-	}
 }
 
 /*
-- 
cgit v0.10.2


From a5be2d0d1a8746e7be5210e3d6b904455000443c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:23 -0800
Subject: freezer: rename thaw_process() to __thaw_task() and simplify the
 implementation

thaw_process() now has only internal users - system and cgroup
freezers.  Remove the unnecessary return value, rename, unexport and
collapse __thaw_process() into it.  This will help further updates to
the freezer code.

-v3: oom_kill grew a use of thaw_process() while this patch was
     pending.  Convert it to use __thaw_task() for now.  In the longer
     term, this should be handled by allowing tasks to die if killed
     even if it's frozen.

-v2: minor style update as suggested by Matt.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Paul Menage <menage@google.com>
Cc: Matt Helsley <matthltc@us.ibm.com>

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index d02b784..ba4f512 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -45,7 +45,7 @@ static inline bool should_send_signal(struct task_struct *p)
 }
 
 /* Takes and releases task alloc lock using task_lock() */
-extern int thaw_process(struct task_struct *p);
+extern void __thaw_task(struct task_struct *t);
 
 extern bool __refrigerator(bool check_kthr_stop);
 extern int freeze_processes(void);
@@ -178,7 +178,6 @@ static inline int frozen(struct task_struct *p) { return 0; }
 static inline int freezing(struct task_struct *p) { return 0; }
 static inline void set_freeze_flag(struct task_struct *p) {}
 static inline void clear_freeze_flag(struct task_struct *p) {}
-static inline int thaw_process(struct task_struct *p) { return 1; }
 
 static inline bool __refrigerator(bool check_kthr_stop) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 5e828a2..a6d405a 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -130,7 +130,7 @@ struct cgroup_subsys freezer_subsys;
  *   write_lock css_set_lock (cgroup iterator start)
  *    task->alloc_lock
  *   read_lock css_set_lock (cgroup iterator start)
- *    task->alloc_lock (inside thaw_process(), prevents race with refrigerator())
+ *    task->alloc_lock (inside __thaw_task(), prevents race with refrigerator())
  *     sighand->siglock
  */
 static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
@@ -300,9 +300,8 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 	struct task_struct *task;
 
 	cgroup_iter_start(cgroup, &it);
-	while ((task = cgroup_iter_next(cgroup, &it))) {
-		thaw_process(task);
-	}
+	while ((task = cgroup_iter_next(cgroup, &it)))
+		__thaw_task(task);
 	cgroup_iter_end(cgroup, &it);
 
 	freezer->state = CGROUP_THAWED;
diff --git a/kernel/freezer.c b/kernel/freezer.c
index b83c30e..c851d58 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -145,18 +145,8 @@ void cancel_freezing(struct task_struct *p)
 	}
 }
 
-static int __thaw_process(struct task_struct *p)
-{
-	if (frozen(p)) {
-		p->flags &= ~PF_FROZEN;
-		return 1;
-	}
-	clear_freeze_flag(p);
-	return 0;
-}
-
 /*
- * Wake up a frozen process
+ * Wake up a frozen task
  *
  * task_lock() is needed to prevent the race with refrigerator() which may
  * occur if the freezing of tasks fails.  Namely, without the lock, if the
@@ -164,15 +154,18 @@ static int __thaw_process(struct task_struct *p)
  * refrigerator() could call frozen_process(), in which case the task would be
  * frozen and no one would thaw it.
  */
-int thaw_process(struct task_struct *p)
+void __thaw_task(struct task_struct *p)
 {
+	bool was_frozen;
+
 	task_lock(p);
-	if (__thaw_process(p) == 1) {
-		task_unlock(p);
-		wake_up_process(p);
-		return 1;
-	}
+	was_frozen = frozen(p);
+	if (was_frozen)
+		p->flags &= ~PF_FROZEN;
+	else
+		clear_freeze_flag(p);
 	task_unlock(p);
-	return 0;
+
+	if (was_frozen)
+		wake_up_process(p);
 }
-EXPORT_SYMBOL(thaw_process);
diff --git a/kernel/power/process.c b/kernel/power/process.c
index addbbe5..fe27872 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -186,7 +186,7 @@ static void thaw_tasks(bool nosig_only)
 		if (cgroup_freezing_or_frozen(p))
 			continue;
 
-		thaw_process(p);
+		__thaw_task(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 }
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 76f2c5a..3134ee2 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -328,7 +328,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		 */
 		if (test_tsk_thread_flag(p, TIF_MEMDIE)) {
 			if (unlikely(frozen(p)))
-				thaw_process(p);
+				__thaw_task(p);
 			return ERR_PTR(-1UL);
 		}
 		if (!p->mm)
-- 
cgit v0.10.2


From a585042f7b933539a0b6bc63650c2d49ffb2e55d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:23 -0800
Subject: freezer: remove racy clear_freeze_flag() and set PF_NOFREEZE on dead
 tasks

clear_freeze_flag() in exit_mm() is racy.  Freezing can start
afterwards.  Remove it.  Skipping freezer for exiting task will be
properly implemented later.

Also, freezable() was testing exit_state directly to make system
freezer ignore dead tasks.  Let the exiting task set PF_NOFREEZE after
entering TASK_DEAD instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>

diff --git a/kernel/exit.c b/kernel/exit.c
index d0b7d98..95a4141 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -679,8 +679,6 @@ static void exit_mm(struct task_struct * tsk)
 	tsk->mm = NULL;
 	up_read(&mm->mmap_sem);
 	enter_lazy_tlb(mm, current);
-	/* We don't want this task to be frozen prematurely */
-	clear_freeze_flag(tsk);
 	task_unlock(tsk);
 	mm_update_next_owner(mm);
 	mmput(mm);
@@ -1040,6 +1038,7 @@ NORET_TYPE void do_exit(long code)
 	exit_rcu();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
+	tsk->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */
 	schedule();
 	BUG();
 	/* Avoid "noreturn function does return".  */
diff --git a/kernel/power/process.c b/kernel/power/process.c
index fe27872..23822dc 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -25,8 +25,7 @@
 static inline int freezable(struct task_struct * p)
 {
 	if ((p == current) ||
-	    (p->flags & PF_NOFREEZE) ||
-	    (p->exit_state != 0))
+	    (p->flags & PF_NOFREEZE))
 		return 0;
 	return 1;
 }
-- 
cgit v0.10.2


From 6cd8dedcdd8e8de01391a7cf25f0b2afeb24f8f4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:23 -0800
Subject: freezer: don't distinguish nosig tasks on thaw

There's no point in thawing nosig tasks before others.  There's no
ordering requirement between the two groups on thaw, which the staged
thawing can't guarantee anyway.  Simplify thaw_processes() by removing
the distinction and collapsing thaw_tasks() into thaw_processes().
This will help further updates to freezer.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 23822dc..9db048f 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -170,34 +170,28 @@ int freeze_kernel_threads(void)
 	return error;
 }
 
-static void thaw_tasks(bool nosig_only)
+void thaw_processes(void)
 {
 	struct task_struct *g, *p;
 
+	oom_killer_enable();
+
+	printk("Restarting tasks ... ");
+
+	thaw_workqueues();
+
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
 		if (!freezable(p))
 			continue;
 
-		if (nosig_only && should_send_signal(p))
-			continue;
-
 		if (cgroup_freezing_or_frozen(p))
 			continue;
 
 		__thaw_task(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
-}
-
-void thaw_processes(void)
-{
-	oom_killer_enable();
 
-	printk("Restarting tasks ... ");
-	thaw_workqueues();
-	thaw_tasks(true);
-	thaw_tasks(false);
 	schedule();
 	printk("done.\n");
 }
-- 
cgit v0.10.2


From 0c9af09262864a2744091ee94c98c4a8fd60c98b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: use dedicated lock instead of task_lock() + memory barrier

Freezer synchronization is needlessly complicated - it's by no means a
hot path and the priority is staying unintrusive and safe.  This patch
makes it simply use a dedicated lock instead of piggy-backing on
task_lock() and playing with memory barriers.

On the failure path of try_to_freeze_tasks(), locking is moved from it
to cancel_freezing().  This makes the frozen() test racy but the race
here is a non-issue as the warning is printed for tasks which failed
to enter frozen for 20 seconds and race on PF_FROZEN at the last
moment doesn't change anything.

This simplifies freezer implementation and eases further changes
including some race fixes.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/kernel/freezer.c b/kernel/freezer.c
index c851d58..4130e48 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -11,17 +11,8 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 
-/*
- * freezing is complete, mark current process as frozen
- */
-static inline void frozen_process(void)
-{
-	if (!unlikely(current->flags & PF_NOFREEZE)) {
-		current->flags |= PF_FROZEN;
-		smp_wmb();
-	}
-	clear_freeze_flag(current);
-}
+/* protects freezing and frozen transitions */
+static DEFINE_SPINLOCK(freezer_lock);
 
 /* Refrigerator is place where frozen processes are stored :-). */
 bool __refrigerator(bool check_kthr_stop)
@@ -31,14 +22,16 @@ bool __refrigerator(bool check_kthr_stop)
 	bool was_frozen = false;
 	long save;
 
-	task_lock(current);
-	if (freezing(current)) {
-		frozen_process();
-		task_unlock(current);
-	} else {
-		task_unlock(current);
+	spin_lock_irq(&freezer_lock);
+	if (!freezing(current)) {
+		spin_unlock_irq(&freezer_lock);
 		return was_frozen;
 	}
+	if (!(current->flags & PF_NOFREEZE))
+		current->flags |= PF_FROZEN;
+	clear_freeze_flag(current);
+	spin_unlock_irq(&freezer_lock);
+
 	save = current->state;
 	pr_debug("%s entered refrigerator\n", current->comm);
 
@@ -99,21 +92,18 @@ static void fake_signal_wake_up(struct task_struct *p)
  */
 bool freeze_task(struct task_struct *p, bool sig_only)
 {
-	/*
-	 * We first check if the task is freezing and next if it has already
-	 * been frozen to avoid the race with frozen_process() which first marks
-	 * the task as frozen and next clears its TIF_FREEZE.
-	 */
-	if (!freezing(p)) {
-		smp_rmb();
-		if (frozen(p))
-			return false;
-
-		if (!sig_only || should_send_signal(p))
-			set_freeze_flag(p);
-		else
-			return false;
-	}
+	unsigned long flags;
+	bool ret = false;
+
+	spin_lock_irqsave(&freezer_lock, flags);
+
+	if (sig_only && !should_send_signal(p))
+		goto out_unlock;
+
+	if (frozen(p))
+		goto out_unlock;
+
+	set_freeze_flag(p);
 
 	if (should_send_signal(p)) {
 		fake_signal_wake_up(p);
@@ -123,26 +113,28 @@ bool freeze_task(struct task_struct *p, bool sig_only)
 		 * TASK_RUNNING transition can't race with task state
 		 * testing in try_to_freeze_tasks().
 		 */
-	} else if (sig_only) {
-		return false;
 	} else {
 		wake_up_state(p, TASK_INTERRUPTIBLE);
 	}
-
-	return true;
+	ret = true;
+out_unlock:
+	spin_unlock_irqrestore(&freezer_lock, flags);
+	return ret;
 }
 
 void cancel_freezing(struct task_struct *p)
 {
 	unsigned long flags;
 
+	spin_lock_irqsave(&freezer_lock, flags);
 	if (freezing(p)) {
 		pr_debug("  clean up: %s\n", p->comm);
 		clear_freeze_flag(p);
-		spin_lock_irqsave(&p->sighand->siglock, flags);
+		spin_lock(&p->sighand->siglock);
 		recalc_sigpending_and_wake(p);
-		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+		spin_unlock(&p->sighand->siglock);
 	}
+	spin_unlock_irqrestore(&freezer_lock, flags);
 }
 
 /*
@@ -156,16 +148,14 @@ void cancel_freezing(struct task_struct *p)
  */
 void __thaw_task(struct task_struct *p)
 {
-	bool was_frozen;
+	unsigned long flags;
 
-	task_lock(p);
-	was_frozen = frozen(p);
-	if (was_frozen)
+	spin_lock_irqsave(&freezer_lock, flags);
+	if (frozen(p)) {
 		p->flags &= ~PF_FROZEN;
-	else
-		clear_freeze_flag(p);
-	task_unlock(p);
-
-	if (was_frozen)
 		wake_up_process(p);
+	} else {
+		clear_freeze_flag(p);
+	}
+	spin_unlock_irqrestore(&freezer_lock, flags);
 }
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 9db048f..bd420ca 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -118,11 +118,9 @@ static int try_to_freeze_tasks(bool sig_only)
 
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
-			task_lock(p);
 			if (!wakeup && freezing(p) && !freezer_should_skip(p))
 				sched_show_task(p);
 			cancel_freezing(p);
-			task_unlock(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 	} else {
-- 
cgit v0.10.2


From 6907483b4e803a20f0b48cc9afa3817420ce61c5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: make freezing indicate freeze condition in effect

Currently freezing (TIF_FREEZE) and frozen (PF_FROZEN) states are
interlocked - freezing is set to request freeze and when the task
actually freezes, it clears freezing and sets frozen.

This interlocking makes things more complex than necessary - freezing
doesn't mean there's freezing condition in effect and frozen doesn't
match the task actually entering and leaving frozen state (it's
cleared by the thawing task).

This patch makes freezing indicate that freeze condition is in effect.
A task enters and stays frozen if freezing.  This makes PF_FROZEN
manipulation done only by the task itself and prevents wakeup from
__thaw_task() leaking outside of refrigerator.

The only place which needs to tell freezing && !frozen is
try_to_freeze_task() to whine about tasks which don't enter frozen.
It's updated to test the condition explicitly.

With the change, frozen() state my linger after __thaw_task() until
the task wakes up and exits fridge.  This can trigger BUG_ON() in
update_if_frozen().  Work it around by testing freezing() && frozen()
instead of frozen().

-v2: Oleg pointed out missing re-check of freezing() when trying to
     clear FROZEN and possible spurious BUG_ON() trigger in
     update_if_frozen().  Both fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul Menage <paul@paulmenage.org>

diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index a6d405a..cd27b08 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -231,7 +231,7 @@ static void update_if_frozen(struct cgroup *cgroup,
 	cgroup_iter_start(cgroup, &it);
 	while ((task = cgroup_iter_next(cgroup, &it))) {
 		ntotal++;
-		if (frozen(task))
+		if (freezing(task) && frozen(task))
 			nfrozen++;
 	}
 
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 4130e48..a8822be 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -22,14 +22,19 @@ bool __refrigerator(bool check_kthr_stop)
 	bool was_frozen = false;
 	long save;
 
+	/*
+	 * Enter FROZEN.  If NOFREEZE, schedule immediate thawing by
+	 * clearing freezing.
+	 */
 	spin_lock_irq(&freezer_lock);
+repeat:
 	if (!freezing(current)) {
 		spin_unlock_irq(&freezer_lock);
 		return was_frozen;
 	}
-	if (!(current->flags & PF_NOFREEZE))
-		current->flags |= PF_FROZEN;
-	clear_freeze_flag(current);
+	if (current->flags & PF_NOFREEZE)
+		clear_freeze_flag(current);
+	current->flags |= PF_FROZEN;
 	spin_unlock_irq(&freezer_lock);
 
 	save = current->state;
@@ -44,7 +49,7 @@ bool __refrigerator(bool check_kthr_stop)
 
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
-		if (!frozen(current) ||
+		if (!freezing(current) ||
 		    (check_kthr_stop && kthread_should_stop()))
 			break;
 		was_frozen = true;
@@ -54,6 +59,13 @@ bool __refrigerator(bool check_kthr_stop)
 	/* Remove the accounting blocker */
 	current->flags &= ~PF_FREEZING;
 
+	/* leave FROZEN */
+	spin_lock_irq(&freezer_lock);
+	if (freezing(current))
+		goto repeat;
+	current->flags &= ~PF_FROZEN;
+	spin_unlock_irq(&freezer_lock);
+
 	pr_debug("%s left refrigerator\n", current->comm);
 
 	/*
@@ -137,25 +149,19 @@ void cancel_freezing(struct task_struct *p)
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
 
-/*
- * Wake up a frozen task
- *
- * task_lock() is needed to prevent the race with refrigerator() which may
- * occur if the freezing of tasks fails.  Namely, without the lock, if the
- * freezing of tasks failed, thaw_tasks() might have run before a task in
- * refrigerator() could call frozen_process(), in which case the task would be
- * frozen and no one would thaw it.
- */
 void __thaw_task(struct task_struct *p)
 {
 	unsigned long flags;
 
+	/*
+	 * Clear freezing and kick @p if FROZEN.  Clearing is guaranteed to
+	 * be visible to @p as waking up implies wmb.  Waking up inside
+	 * freezer_lock also prevents wakeups from leaking outside
+	 * refrigerator.
+	 */
 	spin_lock_irqsave(&freezer_lock, flags);
-	if (frozen(p)) {
-		p->flags &= ~PF_FROZEN;
+	clear_freeze_flag(p);
+	if (frozen(p))
 		wake_up_process(p);
-	} else {
-		clear_freeze_flag(p);
-	}
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
diff --git a/kernel/power/process.c b/kernel/power/process.c
index bd420ca..e6e2739 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -118,7 +118,8 @@ static int try_to_freeze_tasks(bool sig_only)
 
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
-			if (!wakeup && freezing(p) && !freezer_should_skip(p))
+			if (!wakeup && !freezer_should_skip(p) &&
+			    freezing(p) && !frozen(p))
 				sched_show_task(p);
 			cancel_freezing(p);
 		} while_each_thread(g, p);
-- 
cgit v0.10.2


From 85f1d476653f52c97ca75466b2494e67c1cbd25d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: test freezable conditions while holding freezer_lock

try_to_freeze_tasks() and thaw_processes() use freezable() and
frozen() as preliminary tests before initiating operations on a task.
These are done without any synchronization and hinder with
synchronization cleanup without any real performance benefits.

In try_to_freeze_tasks(), open code self test and move PF_NOFREEZE and
frozen() tests inside freezer_lock in freeze_task().

thaw_processes() can simply drop freezable() test as frozen() test in
__thaw_task() is enough.

Note: This used to be a part of larger patch to fix set_freezable()
      race.  Separated out to satisfy ordering among dependent fixes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>

diff --git a/kernel/freezer.c b/kernel/freezer.c
index a8822be..a257ecd 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -109,7 +109,8 @@ bool freeze_task(struct task_struct *p, bool sig_only)
 
 	spin_lock_irqsave(&freezer_lock, flags);
 
-	if (sig_only && !should_send_signal(p))
+	if ((p->flags & PF_NOFREEZE) ||
+	    (sig_only && !should_send_signal(p)))
 		goto out_unlock;
 
 	if (frozen(p))
diff --git a/kernel/power/process.c b/kernel/power/process.c
index e6e2739..e59676f 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -22,14 +22,6 @@
  */
 #define TIMEOUT	(20 * HZ)
 
-static inline int freezable(struct task_struct * p)
-{
-	if ((p == current) ||
-	    (p->flags & PF_NOFREEZE))
-		return 0;
-	return 1;
-}
-
 static int try_to_freeze_tasks(bool sig_only)
 {
 	struct task_struct *g, *p;
@@ -52,10 +44,7 @@ static int try_to_freeze_tasks(bool sig_only)
 		todo = 0;
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
-			if (frozen(p) || !freezable(p))
-				continue;
-
-			if (!freeze_task(p, sig_only))
+			if (p == current || !freeze_task(p, sig_only))
 				continue;
 
 			/*
@@ -181,9 +170,6 @@ void thaw_processes(void)
 
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-		if (!freezable(p))
-			continue;
-
 		if (cgroup_freezing_or_frozen(p))
 			continue;
 
-- 
cgit v0.10.2


From 376fede80e74d98b49d1ba9ac18f23c9fd026ddd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: kill PF_FREEZING

With the previous changes, there's no meaningful difference between
PF_FREEZING and PF_FROZEN.  Remove PF_FREEZING and use PF_FROZEN
instead in task_contributes_to_load().

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68daf4f..d12bd03 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -220,7 +220,7 @@ extern char ___assert_task_state[1 - 2*!!(
 			((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
 #define task_contributes_to_load(task)	\
 				((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-				 (task->flags & PF_FREEZING) == 0)
+				 (task->flags & PF_FROZEN) == 0)
 
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
@@ -1773,7 +1773,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
 #define PF_NPROC_EXCEEDED 0x00001000	/* set_user noticed that RLIMIT_NPROC was exceeded */
 #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
-#define PF_FREEZING	0x00004000	/* freeze in progress. do not account to load */
 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
diff --git a/kernel/freezer.c b/kernel/freezer.c
index a257ecd..b8b5621 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -44,9 +44,6 @@ repeat:
 	recalc_sigpending(); /* We sent fake signal, clean it up */
 	spin_unlock_irq(&current->sighand->siglock);
 
-	/* prevent accounting of that task to load */
-	current->flags |= PF_FREEZING;
-
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!freezing(current) ||
@@ -56,9 +53,6 @@ repeat:
 		schedule();
 	}
 
-	/* Remove the accounting blocker */
-	current->flags &= ~PF_FREEZING;
-
 	/* leave FROZEN */
 	spin_lock_irq(&freezer_lock);
 	if (freezing(current))
-- 
cgit v0.10.2


From 03afed8bc296fa70186ba832c1126228bb992465 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:24 -0800
Subject: freezer: clean up freeze_processes() failure path

freeze_processes() failure path is rather messy.  Freezing is canceled
for workqueues and tasks which aren't frozen yet but frozen tasks are
left alone and should be thawed by the caller and of course some
callers (xen and kexec) didn't do it.

This patch updates __thaw_task() to handle cancelation correctly and
makes freeze_processes() and freeze_kernel_threads() call
thaw_processes() on failure instead so that the system is fully thawed
on failure.  Unnecessary [suspend_]thaw_processes() calls are removed
from kernel/power/hibernate.c, suspend.c and user.c.

While at it, restructure error checking if clause in suspend_prepare()
to be less weird.

-v2: Srivatsa spotted missing removal of suspend_thaw_processes() in
     suspend_prepare() and error in commit message.  Updated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index ba4f512..93f411a 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -61,7 +61,6 @@ static inline bool try_to_freeze(void)
 }
 
 extern bool freeze_task(struct task_struct *p, bool sig_only);
-extern void cancel_freezing(struct task_struct *p);
 
 #ifdef CONFIG_CGROUP_FREEZER
 extern int cgroup_freezing_or_frozen(struct task_struct *task);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index b8b5621..11e32d4 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -129,21 +129,6 @@ out_unlock:
 	return ret;
 }
 
-void cancel_freezing(struct task_struct *p)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&freezer_lock, flags);
-	if (freezing(p)) {
-		pr_debug("  clean up: %s\n", p->comm);
-		clear_freeze_flag(p);
-		spin_lock(&p->sighand->siglock);
-		recalc_sigpending_and_wake(p);
-		spin_unlock(&p->sighand->siglock);
-	}
-	spin_unlock_irqrestore(&freezer_lock, flags);
-}
-
 void __thaw_task(struct task_struct *p)
 {
 	unsigned long flags;
@@ -153,10 +138,18 @@ void __thaw_task(struct task_struct *p)
 	 * be visible to @p as waking up implies wmb.  Waking up inside
 	 * freezer_lock also prevents wakeups from leaking outside
 	 * refrigerator.
+	 *
+	 * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to
+	 * avoid leaving dangling TIF_SIGPENDING behind.
 	 */
 	spin_lock_irqsave(&freezer_lock, flags);
 	clear_freeze_flag(p);
-	if (frozen(p))
+	if (frozen(p)) {
 		wake_up_process(p);
+	} else {
+		spin_lock(&p->sighand->siglock);
+		recalc_sigpending_and_wake(p);
+		spin_unlock(&p->sighand->siglock);
+	}
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 196c0126..ba2319f 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -607,17 +607,6 @@ static void power_down(void)
 	while(1);
 }
 
-static int prepare_processes(void)
-{
-	int error = 0;
-
-	if (freeze_processes()) {
-		error = -EBUSY;
-		thaw_processes();
-	}
-	return error;
-}
-
 /**
  * hibernate - Carry out system hibernation, including saving the image.
  */
@@ -650,7 +639,7 @@ int hibernate(void)
 	sys_sync();
 	printk("done.\n");
 
-	error = prepare_processes();
+	error = freeze_processes();
 	if (error)
 		goto Finish;
 
@@ -811,7 +800,7 @@ static int software_resume(void)
 		goto close_finish;
 
 	pr_debug("PM: Preparing processes for restore.\n");
-	error = prepare_processes();
+	error = freeze_processes();
 	if (error) {
 		swsusp_close(FMODE_READ);
 		goto Done;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index e59676f..ce64383 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -91,11 +91,6 @@ static int try_to_freeze_tasks(bool sig_only)
 	elapsed_csecs = elapsed_csecs64;
 
 	if (todo) {
-		/* This does not unfreeze processes that are already frozen
-		 * (we have slightly ugly calling convention in that respect,
-		 * and caller must call thaw_processes() if something fails),
-		 * but it cleans up leftover PF_FREEZE requests.
-		 */
 		printk("\n");
 		printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds "
 		       "(%d tasks refusing to freeze, wq_busy=%d):\n",
@@ -103,14 +98,11 @@ static int try_to_freeze_tasks(bool sig_only)
 		       elapsed_csecs / 100, elapsed_csecs % 100,
 		       todo - wq_busy, wq_busy);
 
-		thaw_workqueues();
-
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			if (!wakeup && !freezer_should_skip(p) &&
 			    freezing(p) && !frozen(p))
 				sched_show_task(p);
-			cancel_freezing(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 	} else {
@@ -123,6 +115,8 @@ static int try_to_freeze_tasks(bool sig_only)
 
 /**
  * freeze_processes - Signal user space processes to enter the refrigerator.
+ *
+ * On success, returns 0.  On failure, -errno and system is fully thawed.
  */
 int freeze_processes(void)
 {
@@ -137,11 +131,15 @@ int freeze_processes(void)
 	printk("\n");
 	BUG_ON(in_atomic());
 
+	if (error)
+		thaw_processes();
 	return error;
 }
 
 /**
  * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator.
+ *
+ * On success, returns 0.  On failure, -errno and system is fully thawed.
  */
 int freeze_kernel_threads(void)
 {
@@ -155,6 +153,8 @@ int freeze_kernel_threads(void)
 	printk("\n");
 	BUG_ON(in_atomic());
 
+	if (error)
+		thaw_processes();
 	return error;
 }
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4953dc0..d336b27 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -106,13 +106,11 @@ static int suspend_prepare(void)
 		goto Finish;
 
 	error = suspend_freeze_processes();
-	if (error) {
-		suspend_stats.failed_freeze++;
-		dpm_save_failed_step(SUSPEND_FREEZE);
-	} else
+	if (!error)
 		return 0;
 
-	suspend_thaw_processes();
+	suspend_stats.failed_freeze++;
+	dpm_save_failed_step(SUSPEND_FREEZE);
 	usermodehelper_enable();
  Finish:
 	pm_notifier_call_chain(PM_POST_SUSPEND);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6d8f535..7cc3f5b 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -257,10 +257,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 			break;
 
 		error = freeze_processes();
-		if (error) {
-			thaw_processes();
+		if (error)
 			usermodehelper_enable();
-		}
 		if (!error)
 			data->frozen = 1;
 		break;
-- 
cgit v0.10.2


From 22b4e111fa01a1147aa562ceaf18a752a928ef4e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: cgroup_freezer: prepare for removal of TIF_FREEZE

TIF_FREEZE will be removed soon and freezing() will directly test
whether any freezing condition is in effect.  Make the following
changes in preparation.

* Rename cgroup_freezing_or_frozen() to cgroup_freezing() and make it
  return bool.

* Make cgroup_freezing() access task_freezer() under rcu read lock
  instead of task_lock().  This makes the state dereferencing racy
  against task moving to another cgroup; however, it was already racy
  without this change as ->state dereference wasn't synchronized.
  This will be later dealt with using attach hooks.

* freezer->state is now set before trying to push tasks into the
  target state.

-v2: Oleg pointed out that freeze_change_state() was setting
     freeze->state incorrectly to CGROUP_FROZEN instead of
     CGROUP_FREEZING.  Fixed.

-v3: Matt pointed out that setting CGROUP_FROZEN used to always invoke
     try_to_freeze_cgroup() regardless of the current state.  Patch
     updated such that the actual freeze/thaw operations are always
     performed on invocation.  This shouldn't make any difference
     unless something is broken.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Paul Menage <paul@paulmenage.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 93f411a..b2b4abc 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -63,11 +63,11 @@ static inline bool try_to_freeze(void)
 extern bool freeze_task(struct task_struct *p, bool sig_only);
 
 #ifdef CONFIG_CGROUP_FREEZER
-extern int cgroup_freezing_or_frozen(struct task_struct *task);
+extern bool cgroup_freezing(struct task_struct *task);
 #else /* !CONFIG_CGROUP_FREEZER */
-static inline int cgroup_freezing_or_frozen(struct task_struct *task)
+static inline bool cgroup_freezing(struct task_struct *task)
 {
-	return 0;
+	return false;
 }
 #endif /* !CONFIG_CGROUP_FREEZER */
 
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index cd27b08..e6a1b8d 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -48,19 +48,17 @@ static inline struct freezer *task_freezer(struct task_struct *task)
 			    struct freezer, css);
 }
 
-static inline int __cgroup_freezing_or_frozen(struct task_struct *task)
+bool cgroup_freezing(struct task_struct *task)
 {
-	enum freezer_state state = task_freezer(task)->state;
-	return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN);
-}
+	enum freezer_state state;
+	bool ret;
 
-int cgroup_freezing_or_frozen(struct task_struct *task)
-{
-	int result;
-	task_lock(task);
-	result = __cgroup_freezing_or_frozen(task);
-	task_unlock(task);
-	return result;
+	rcu_read_lock();
+	state = task_freezer(task)->state;
+	ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN;
+	rcu_read_unlock();
+
+	return ret;
 }
 
 /*
@@ -102,9 +100,6 @@ struct cgroup_subsys freezer_subsys;
  * freezer_can_attach():
  * cgroup_mutex (held by caller of can_attach)
  *
- * cgroup_freezing_or_frozen():
- * task->alloc_lock (to get task's cgroup)
- *
  * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
  * freezer->lock
  *  sighand->siglock (if the cgroup is freezing)
@@ -177,13 +172,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
 
 static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
-	rcu_read_lock();
-	if (__cgroup_freezing_or_frozen(tsk)) {
-		rcu_read_unlock();
-		return -EBUSY;
-	}
-	rcu_read_unlock();
-	return 0;
+	return cgroup_freezing(tsk) ? -EBUSY : 0;
 }
 
 static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
@@ -279,7 +268,6 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 	struct task_struct *task;
 	unsigned int num_cant_freeze_now = 0;
 
-	freezer->state = CGROUP_FREEZING;
 	cgroup_iter_start(cgroup, &it);
 	while ((task = cgroup_iter_next(cgroup, &it))) {
 		if (!freeze_task(task, true))
@@ -303,8 +291,6 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 	while ((task = cgroup_iter_next(cgroup, &it)))
 		__thaw_task(task);
 	cgroup_iter_end(cgroup, &it);
-
-	freezer->state = CGROUP_THAWED;
 }
 
 static int freezer_change_state(struct cgroup *cgroup,
@@ -318,20 +304,20 @@ static int freezer_change_state(struct cgroup *cgroup,
 	spin_lock_irq(&freezer->lock);
 
 	update_if_frozen(cgroup, freezer);
-	if (goal_state == freezer->state)
-		goto out;
 
 	switch (goal_state) {
 	case CGROUP_THAWED:
+		freezer->state = CGROUP_THAWED;
 		unfreeze_cgroup(cgroup, freezer);
 		break;
 	case CGROUP_FROZEN:
+		freezer->state = CGROUP_FREEZING;
 		retval = try_to_freeze_cgroup(cgroup, freezer);
 		break;
 	default:
 		BUG();
 	}
-out:
+
 	spin_unlock_irq(&freezer->lock);
 
 	return retval;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index ce64383..9f6f5c7 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -170,7 +170,7 @@ void thaw_processes(void)
 
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-		if (cgroup_freezing_or_frozen(p))
+		if (cgroup_freezing(p))
 			continue;
 
 		__thaw_task(p);
-- 
cgit v0.10.2


From a3201227f803ad7fd43180c5195dbe5a2bf998aa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: freezer: make freezing() test freeze conditions in effect instead of
 TIF_FREEZE

Using TIF_FREEZE for freezing worked when there was only single
freezing condition (the PM one); however, now there is also the
cgroup_freezer and single bit flag is getting clumsy.
thaw_processes() is already testing whether cgroup freezing in in
effect to avoid thawing tasks which were frozen by both PM and cgroup
freezers.

This is racy (nothing prevents race against cgroup freezing) and
fragile.  A much simpler way is to test actual freeze conditions from
freezing() - ie. directly test whether PM or cgroup freezing is in
effect.

This patch adds variables to indicate whether and what type of
freezing conditions are in effect and reimplements freezing() such
that it directly tests whether any of the two freezing conditions is
active and the task should freeze.  On fast path, freezing() is still
very cheap - it only tests system_freezing_cnt.

This makes the clumsy dancing aroung TIF_FREEZE unnecessary and
freeze/thaw operations more usual - updating state variables for the
new state and nudging target tasks so that they notice the new state
and comply.  As long as the nudging happens after state update, it's
race-free.

* This allows use of freezing() in freeze_task().  Replace the open
  coded tests with freezing().

* p != current test is added to warning printing conditions in
  try_to_freeze_tasks() failure path.  This is necessary as freezing()
  is now true for the task which initiated freezing too.

-v2: Oleg pointed out that re-freezing FROZEN cgroup could increment
     system_freezing_cnt.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Paul Menage <paul@paulmenage.org>  (for the cgroup portions)

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index b2b4abc..8e29f2b 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -5,8 +5,13 @@
 
 #include <linux/sched.h>
 #include <linux/wait.h>
+#include <linux/atomic.h>
 
 #ifdef CONFIG_FREEZER
+extern atomic_t system_freezing_cnt;	/* nr of freezing conds in effect */
+extern bool pm_freezing;		/* PM freezing in effect */
+extern bool pm_nosig_freezing;		/* PM nosig freezing in effect */
+
 /*
  * Check if a process has been frozen
  */
@@ -15,28 +20,16 @@ static inline int frozen(struct task_struct *p)
 	return p->flags & PF_FROZEN;
 }
 
-/*
- * Check if there is a request to freeze a process
- */
-static inline int freezing(struct task_struct *p)
-{
-	return test_tsk_thread_flag(p, TIF_FREEZE);
-}
+extern bool freezing_slow_path(struct task_struct *p);
 
 /*
- * Request that a process be frozen
- */
-static inline void set_freeze_flag(struct task_struct *p)
-{
-	set_tsk_thread_flag(p, TIF_FREEZE);
-}
-
-/*
- * Sometimes we may need to cancel the previous 'freeze' request
+ * Check if there is a request to freeze a process
  */
-static inline void clear_freeze_flag(struct task_struct *p)
+static inline bool freezing(struct task_struct *p)
 {
-	clear_tsk_thread_flag(p, TIF_FREEZE);
+	if (likely(!atomic_read(&system_freezing_cnt)))
+		return false;
+	return freezing_slow_path(p);
 }
 
 static inline bool should_send_signal(struct task_struct *p)
@@ -174,9 +167,7 @@ static inline void set_freezable_with_signal(void)
 })
 #else /* !CONFIG_FREEZER */
 static inline int frozen(struct task_struct *p) { return 0; }
-static inline int freezing(struct task_struct *p) { return 0; }
-static inline void set_freeze_flag(struct task_struct *p) {}
-static inline void clear_freeze_flag(struct task_struct *p) {}
+static inline bool freezing(struct task_struct *p) { return false; }
 
 static inline bool __refrigerator(bool check_kthr_stop) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e6a1b8d..2327ad1 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -145,7 +145,11 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
 static void freezer_destroy(struct cgroup_subsys *ss,
 			    struct cgroup *cgroup)
 {
-	kfree(cgroup_freezer(cgroup));
+	struct freezer *freezer = cgroup_freezer(cgroup);
+
+	if (freezer->state != CGROUP_THAWED)
+		atomic_dec(&system_freezing_cnt);
+	kfree(freezer);
 }
 
 /*
@@ -307,10 +311,14 @@ static int freezer_change_state(struct cgroup *cgroup,
 
 	switch (goal_state) {
 	case CGROUP_THAWED:
+		if (freezer->state != CGROUP_THAWED)
+			atomic_dec(&system_freezing_cnt);
 		freezer->state = CGROUP_THAWED;
 		unfreeze_cgroup(cgroup, freezer);
 		break;
 	case CGROUP_FROZEN:
+		if (freezer->state == CGROUP_THAWED)
+			atomic_inc(&system_freezing_cnt);
 		freezer->state = CGROUP_FREEZING;
 		retval = try_to_freeze_cgroup(cgroup, freezer);
 		break;
diff --git a/kernel/fork.c b/kernel/fork.c
index ba0d172..d53316e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -997,7 +997,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 	new_flags |= PF_FORKNOEXEC;
 	new_flags |= PF_STARTING;
 	p->flags = new_flags;
-	clear_freeze_flag(p);
 }
 
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 11e32d4..f53cd5a 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -11,9 +11,41 @@
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 
+/* total number of freezing conditions in effect */
+atomic_t system_freezing_cnt = ATOMIC_INIT(0);
+EXPORT_SYMBOL(system_freezing_cnt);
+
+/* indicate whether PM freezing is in effect, protected by pm_mutex */
+bool pm_freezing;
+bool pm_nosig_freezing;
+
 /* protects freezing and frozen transitions */
 static DEFINE_SPINLOCK(freezer_lock);
 
+/**
+ * freezing_slow_path - slow path for testing whether a task needs to be frozen
+ * @p: task to be tested
+ *
+ * This function is called by freezing() if system_freezing_cnt isn't zero
+ * and tests whether @p needs to enter and stay in frozen state.  Can be
+ * called under any context.  The freezers are responsible for ensuring the
+ * target tasks see the updated state.
+ */
+bool freezing_slow_path(struct task_struct *p)
+{
+	if (p->flags & PF_NOFREEZE)
+		return false;
+
+	if (pm_nosig_freezing || cgroup_freezing(p))
+		return true;
+
+	if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(freezing_slow_path);
+
 /* Refrigerator is place where frozen processes are stored :-). */
 bool __refrigerator(bool check_kthr_stop)
 {
@@ -23,17 +55,11 @@ bool __refrigerator(bool check_kthr_stop)
 	long save;
 
 	/*
-	 * Enter FROZEN.  If NOFREEZE, schedule immediate thawing by
-	 * clearing freezing.
+	 * No point in checking freezing() again - the caller already did.
+	 * Proceed to enter FROZEN.
 	 */
 	spin_lock_irq(&freezer_lock);
 repeat:
-	if (!freezing(current)) {
-		spin_unlock_irq(&freezer_lock);
-		return was_frozen;
-	}
-	if (current->flags & PF_NOFREEZE)
-		clear_freeze_flag(current);
 	current->flags |= PF_FROZEN;
 	spin_unlock_irq(&freezer_lock);
 
@@ -99,18 +125,12 @@ static void fake_signal_wake_up(struct task_struct *p)
 bool freeze_task(struct task_struct *p, bool sig_only)
 {
 	unsigned long flags;
-	bool ret = false;
 
 	spin_lock_irqsave(&freezer_lock, flags);
-
-	if ((p->flags & PF_NOFREEZE) ||
-	    (sig_only && !should_send_signal(p)))
-		goto out_unlock;
-
-	if (frozen(p))
-		goto out_unlock;
-
-	set_freeze_flag(p);
+	if (!freezing(p) || frozen(p)) {
+		spin_unlock_irqrestore(&freezer_lock, flags);
+		return false;
+	}
 
 	if (should_send_signal(p)) {
 		fake_signal_wake_up(p);
@@ -123,10 +143,9 @@ bool freeze_task(struct task_struct *p, bool sig_only)
 	} else {
 		wake_up_state(p, TASK_INTERRUPTIBLE);
 	}
-	ret = true;
-out_unlock:
+
 	spin_unlock_irqrestore(&freezer_lock, flags);
-	return ret;
+	return true;
 }
 
 void __thaw_task(struct task_struct *p)
@@ -143,7 +162,6 @@ void __thaw_task(struct task_struct *p)
 	 * avoid leaving dangling TIF_SIGPENDING behind.
 	 */
 	spin_lock_irqsave(&freezer_lock, flags);
-	clear_freeze_flag(p);
 	if (frozen(p)) {
 		wake_up_process(p);
 	} else {
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 9f6f5c7..0beb51e 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -101,7 +101,7 @@ static int try_to_freeze_tasks(bool sig_only)
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			if (!wakeup && !freezer_should_skip(p) &&
-			    freezing(p) && !frozen(p))
+			    p != current && freezing(p) && !frozen(p))
 				sched_show_task(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
@@ -122,7 +122,11 @@ int freeze_processes(void)
 {
 	int error;
 
+	if (!pm_freezing)
+		atomic_inc(&system_freezing_cnt);
+
 	printk("Freezing user space processes ... ");
+	pm_freezing = true;
 	error = try_to_freeze_tasks(true);
 	if (!error) {
 		printk("done.");
@@ -146,6 +150,7 @@ int freeze_kernel_threads(void)
 	int error;
 
 	printk("Freezing remaining freezable tasks ... ");
+	pm_nosig_freezing = true;
 	error = try_to_freeze_tasks(false);
 	if (!error)
 		printk("done.");
@@ -162,6 +167,11 @@ void thaw_processes(void)
 {
 	struct task_struct *g, *p;
 
+	if (pm_freezing)
+		atomic_dec(&system_freezing_cnt);
+	pm_freezing = false;
+	pm_nosig_freezing = false;
+
 	oom_killer_enable();
 
 	printk("Restarting tasks ... ");
@@ -170,9 +180,6 @@ void thaw_processes(void)
 
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-		if (cgroup_freezing(p))
-			continue;
-
 		__thaw_task(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
-- 
cgit v0.10.2


From d88e4cb67197d007fb778d62fe17360e970d5bfa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: freezer: remove now unused TIF_FREEZE

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: linux-arch@vger.kernel.org

diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index ff73db0..28335bd 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -79,7 +79,6 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define TIF_UAC_SIGBUS		12	/* ! userspace part of 'osf_sysinfo' */
 #define TIF_MEMDIE		13	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	14	/* restore signal mask in do_signal */
-#define TIF_FREEZE		16	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -87,7 +86,6 @@ register struct thread_info *__current_thread_info __asm__("$8");
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
-#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 /* Work to do on interrupt/exception return.  */
 #define _TIF_WORK_MASK		(_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 7b5cc8d..0f30c3a 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -142,7 +142,6 @@ extern void vfp_flush_hwstate(struct thread_info *);
 #define TIF_POLLING_NRFLAG	16
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
-#define TIF_FREEZE		19
 #define TIF_RESTORE_SIGMASK	20
 #define TIF_SECCOMP		21
 
@@ -152,7 +151,6 @@ extern void vfp_flush_hwstate(struct thread_info *);
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_USING_IWMMXT	(1 << TIF_USING_IWMMXT)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 7a9c03d..e5deda4 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -85,7 +85,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTORE_SIGMASK	7	/* restore signal mask in do_signal */
 #define TIF_CPU_GOING_TO_SLEEP	8	/* CPU is entering sleep 0 mode */
 #define TIF_NOTIFY_RESUME	9	/* callback before returning to user */
-#define TIF_FREEZE		29
 #define TIF_DEBUG		30	/* debugging enabled */
 #define TIF_USERSPACE		31      /* true if FS sets userspace */
 
@@ -98,7 +97,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 /* Note: The masks below must never span more than 16 bits! */
 
diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h
index 02560fd..53ad100 100644
--- a/arch/blackfin/include/asm/thread_info.h
+++ b/arch/blackfin/include/asm/thread_info.h
@@ -100,7 +100,6 @@ static inline struct thread_info *current_thread_info(void)
 					   TIF_NEED_RESCHED */
 #define TIF_MEMDIE		4	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
-#define TIF_FREEZE		6	/* is freezing for suspend */
 #define TIF_IRQ_SYNC		7	/* sync pipeline stage */
 #define TIF_NOTIFY_RESUME	8	/* callback before returning to user */
 #define TIF_SINGLESTEP		9
@@ -111,7 +110,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
-#define _TIF_FREEZE		(1<<TIF_FREEZE)
 #define _TIF_IRQ_SYNC		(1<<TIF_IRQ_SYNC)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 332f19c..29b9288 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -86,7 +86,6 @@ struct thread_info {
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17	/* is terminating due to OOM killer */
-#define TIF_FREEZE		18	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -94,7 +93,6 @@ struct thread_info {
 #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
-#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index cefbe73..92d83ea 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -111,7 +111,6 @@ register struct thread_info *__current_thread_info asm("gr15");
 #define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17	/* is terminating due to OOM killer */
-#define TIF_FREEZE		18	/* freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -120,7 +119,6 @@ register struct thread_info *__current_thread_info asm("gr15");
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
index d6f1784..9c126e0 100644
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h
@@ -90,7 +90,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		4	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
 #define TIF_NOTIFY_RESUME	6	/* callback before returning to user */
-#define TIF_FREEZE		16	/* is freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -99,7 +98,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
-#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index ff0cc84..e054bcc 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -113,7 +113,6 @@ struct thread_info {
 #define TIF_MEMDIE		17	/* is terminating due to OOM killer */
 #define TIF_MCA_INIT		18	/* this task is processing MCA or INIT */
 #define TIF_DB_DISABLED		19	/* debug trap disabled for fsyscall */
-#define TIF_FREEZE		20	/* is freezing for suspend */
 #define TIF_RESTORE_RSE		21	/* user RBS is newer than kernel RBS */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
@@ -126,7 +125,6 @@ struct thread_info {
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_MCA_INIT		(1 << TIF_MCA_INIT)
 #define _TIF_DB_DISABLED	(1 << TIF_DB_DISABLED)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
 #define _TIF_RESTORE_RSE	(1 << TIF_RESTORE_RSE)
 
 /* "work to do on user-return" bits */
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index 0227dba..bf8fa3c 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -138,7 +138,6 @@ static inline unsigned int get_thread_fault_code(void)
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
-#define TIF_FREEZE		19	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -149,7 +148,6 @@ static inline unsigned int get_thread_fault_code(void)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
-#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h
index 7909889..294df15 100644
--- a/arch/m68k/include/asm/thread_info.h
+++ b/arch/m68k/include/asm/thread_info.h
@@ -103,7 +103,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_DELAYED_TRACE	14	/* single step a syscall */
 #define TIF_SYSCALL_TRACE	15	/* syscall trace active */
 #define TIF_MEMDIE		16	/* is terminating due to OOM killer */
-#define TIF_FREEZE		17	/* thread is freezing for suspend */
 #define TIF_RESTORE_SIGMASK	18	/* restore signal mask in do_signal */
 
 #endif	/* _ASM_M68K_THREAD_INFO_H */
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index b73da2a..1a8ab6a 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -125,7 +125,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		6	/* is terminating due to OOM killer */
 #define TIF_SYSCALL_AUDIT	9       /* syscall auditing active */
 #define TIF_SECCOMP		10      /* secure computing */
-#define TIF_FREEZE		14	/* Freezing for suspend */
 
 /* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_POLLING_NRFLAG	16
@@ -137,7 +136,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_IRET		(1 << TIF_IRET)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
 
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 97f8bf6..0d85d8e 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -117,7 +117,6 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define TIF_USEDFPU		16	/* FPU was used by this task this quantum (SMP) */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
-#define TIF_FREEZE		19
 #define TIF_FIXADE		20	/* Fix address errors in software */
 #define TIF_LOGADE		21	/* Log address errors to syslog */
 #define TIF_32BIT_REGS		22	/* also implies 16/32 fprs */
@@ -141,7 +140,6 @@ register struct thread_info *__current_thread_info __asm__("$28");
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_USEDFPU		(1<<TIF_USEDFPU)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
-#define _TIF_FREEZE		(1<<TIF_FREEZE)
 #define _TIF_FIXADE		(1<<TIF_FIXADE)
 #define _TIF_LOGADE		(1<<TIF_LOGADE)
 #define _TIF_32BIT_REGS		(1<<TIF_32BIT_REGS)
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index 87c2130..28cf521 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -165,7 +165,6 @@ extern void free_thread_info(struct thread_info *);
 #define TIF_RESTORE_SIGMASK	5	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		17	/* is terminating due to OOM killer */
-#define TIF_FREEZE		18	/* freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	+(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	+(1 << TIF_NOTIFY_RESUME)
@@ -174,7 +173,6 @@ extern void free_thread_info(struct thread_info *);
 #define _TIF_SINGLESTEP		+(1 << TIF_SINGLESTEP)
 #define _TIF_RESTORE_SIGMASK	+(1 << TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG	+(1 << TIF_POLLING_NRFLAG)
-#define _TIF_FREEZE		+(1 << TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index aa8de72..6d9c7c7 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -58,7 +58,6 @@ struct thread_info {
 #define TIF_32BIT               4       /* 32 bit binary */
 #define TIF_MEMDIE		5	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	6	/* restore saved signal mask */
-#define TIF_FREEZE		7	/* is freezing for suspend */
 #define TIF_NOTIFY_RESUME	8	/* callback before returning to user */
 #define TIF_SINGLESTEP		9	/* single stepping? */
 #define TIF_BLOCKSTEP		10	/* branch stepping? */
@@ -69,7 +68,6 @@ struct thread_info {
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_32BIT		(1 << TIF_32BIT)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_BLOCKSTEP		(1 << TIF_BLOCKSTEP)
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 836f231..96471494 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -109,7 +109,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_RESTOREALL		11	/* Restore all regs (implies NOERROR) */
 #define TIF_NOERROR		12	/* Force successful syscall return */
 #define TIF_NOTIFY_RESUME	13	/* callback before returning to user */
-#define TIF_FREEZE		14	/* Freezing for suspend */
 #define TIF_SYSCALL_TRACEPOINT	15	/* syscall tracepoint instrumentation */
 #define TIF_RUNLATCH		16	/* Is the runlatch enabled? */
 
@@ -127,7 +126,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_RESTOREALL		(1<<TIF_RESTOREALL)
 #define _TIF_NOERROR		(1<<TIF_NOERROR)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
-#define _TIF_FREEZE		(1<<TIF_FREEZE)
 #define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
 #define _TIF_RUNLATCH		(1<<TIF_RUNLATCH)
 #define _TIF_SYSCALL_T_OR_A	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index a231834..a730381 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -102,7 +102,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	19	/* restore signal mask in do_signal() */
 #define TIF_SINGLE_STEP		20	/* This task is single stepped */
-#define TIF_FREEZE		21	/* thread is freezing for suspend */
 
 #define _TIF_SYSCALL		(1<<TIF_SYSCALL)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -119,7 +118,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_31BIT		(1<<TIF_31BIT)
 #define _TIF_SINGLE_STEP	(1<<TIF_SINGLE_STEP)
-#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #ifdef CONFIG_64BIT
 #define is_32bit_task()		(test_thread_flag(TIF_31BIT))
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index ea2d508..20ee40a 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -122,7 +122,6 @@ extern void init_thread_xstate(void);
 #define TIF_SYSCALL_TRACEPOINT	8	/* for ftrace syscall instrumentation */
 #define TIF_POLLING_NRFLAG	17	/* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
-#define TIF_FREEZE		19	/* Freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -133,7 +132,6 @@ extern void init_thread_xstate(void);
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 /*
  * _TIF_ALLWORK_MASK and _TIF_WORK_MASK need to fit within 2 bytes, or we
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index fa57532..5cc5888 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -133,7 +133,6 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 #define TIF_POLLING_NRFLAG	9	/* true if poll_idle() is polling
 					 * TIF_NEED_RESCHED */
 #define TIF_MEMDIE		10	/* is terminating due to OOM killer */
-#define TIF_FREEZE		11	/* is freezing for suspend */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -147,7 +146,6 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
 #define _TIF_DO_NOTIFY_RESUME_MASK	(_TIF_NOTIFY_RESUME | \
 					 _TIF_SIGPENDING | \
 					 _TIF_RESTORE_SIGMASK)
-#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 60d86be..01d057f 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -225,7 +225,6 @@ register struct thread_info *current_thread_info_reg asm("g6");
 /* flag bit 12 is available */
 #define TIF_MEMDIE		13	/* is terminating due to OOM killer */
 #define TIF_POLLING_NRFLAG	14
-#define TIF_FREEZE		15	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -237,7 +236,6 @@ register struct thread_info *current_thread_info_reg asm("g6");
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
-#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_USER_WORK_MASK	((0xff << TI_FLAG_WSAVED_SHIFT) | \
 				 _TIF_DO_NOTIFY_RESUME_MASK | \
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index 5bd1bad..200c4ab 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -71,7 +71,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		5	/* is terminating due to OOM killer */
 #define TIF_SYSCALL_AUDIT	6
 #define TIF_RESTORE_SIGMASK	7
-#define TIF_FREEZE		16	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -80,6 +79,5 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_MEMDIE		(1 << TIF_MEMDIE)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
 
 #endif
diff --git a/arch/unicore32/include/asm/thread_info.h b/arch/unicore32/include/asm/thread_info.h
index c270e9e..89f7557 100644
--- a/arch/unicore32/include/asm/thread_info.h
+++ b/arch/unicore32/include/asm/thread_info.h
@@ -135,14 +135,12 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
 #define TIF_SYSCALL_TRACE	8
 #define TIF_MEMDIE		18
-#define TIF_FREEZE		19
 #define TIF_RESTORE_SIGMASK	20
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
 
 /*
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a1fe5c1..32125af 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -90,7 +90,6 @@ struct thread_info {
 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
 #define TIF_DEBUG		21	/* uses debug registers */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
-#define TIF_FREEZE		23	/* is freezing for suspend */
 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
 #define TIF_BLOCKSTEP		25	/* set when we want DEBUGCTLMSR_BTF */
 #define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
@@ -112,7 +111,6 @@ struct thread_info {
 #define _TIF_FORK		(1 << TIF_FORK)
 #define _TIF_DEBUG		(1 << TIF_DEBUG)
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
-#define _TIF_FREEZE		(1 << TIF_FREEZE)
 #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
 #define _TIF_BLOCKSTEP		(1 << TIF_BLOCKSTEP)
 #define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index 7be8acc..6abbedd 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -132,7 +132,6 @@ static inline struct thread_info *current_thread_info(void)
 #define TIF_MEMDIE		5	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	6	/* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
-#define TIF_FREEZE		17	/* is freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SIGPENDING		(1<<TIF_SIGPENDING)
@@ -141,7 +140,6 @@ static inline struct thread_info *current_thread_info(void)
 #define _TIF_IRET		(1<<TIF_IRET)
 #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
-#define _TIF_FREEZE		(1<<TIF_FREEZE)
 
 #define _TIF_WORK_MASK		0x0000FFFE	/* work to do on interrupt/exception return */
 #define _TIF_ALLWORK_MASK	0x0000FFFF	/* work to do on any return to u-space */
-- 
cgit v0.10.2


From 948246f70a811c872b9d93bb4a8ab5823c4c79e0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: freezer: remove should_send_signal() and update frozen()

should_send_signal() is only used in freezer.c.  Exporting them only
increases chance of abuse.  Open code the two users and remove it.

Update frozen() to return bool.

Signed-off-by: Tejun Heo <tj@kernel.org>

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 8e29f2b..3d50913 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -15,7 +15,7 @@ extern bool pm_nosig_freezing;		/* PM nosig freezing in effect */
 /*
  * Check if a process has been frozen
  */
-static inline int frozen(struct task_struct *p)
+static inline bool frozen(struct task_struct *p)
 {
 	return p->flags & PF_FROZEN;
 }
@@ -32,11 +32,6 @@ static inline bool freezing(struct task_struct *p)
 	return freezing_slow_path(p);
 }
 
-static inline bool should_send_signal(struct task_struct *p)
-{
-	return !(p->flags & PF_FREEZER_NOSIG);
-}
-
 /* Takes and releases task alloc lock using task_lock() */
 extern void __thaw_task(struct task_struct *t);
 
@@ -166,7 +161,7 @@ static inline void set_freezable_with_signal(void)
 	__retval;							\
 })
 #else /* !CONFIG_FREEZER */
-static inline int frozen(struct task_struct *p) { return 0; }
+static inline bool frozen(struct task_struct *p) { return false; }
 static inline bool freezing(struct task_struct *p) { return false; }
 
 static inline bool __refrigerator(bool check_kthr_stop) { return false; }
diff --git a/kernel/freezer.c b/kernel/freezer.c
index f53cd5a..95a1238 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -132,7 +132,7 @@ bool freeze_task(struct task_struct *p, bool sig_only)
 		return false;
 	}
 
-	if (should_send_signal(p)) {
+	if (!(p->flags & PF_FREEZER_NOSIG)) {
 		fake_signal_wake_up(p);
 		/*
 		 * fake_signal_wake_up() goes through p's scheduler
-- 
cgit v0.10.2


From 96ee6d8539c9fc6742908d85eb9723abb5c91854 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:25 -0800
Subject: freezer: fix set_freezable[_with_signal]() race

A kthread doing set_freezable*() may race with on-going PM freeze and
the freezer might think all tasks are frozen while the new freezable
kthread is merrily proceeding to execute code paths which aren't
supposed to be executing during PM freeze.

Reimplement set_freezable[_with_signal]() using __set_freezable() such
that freezable PF flags are modified under freezer_lock and
try_to_freeze() is called afterwards.  This eliminates race condition
against freezing.

Note: Separated out from larger patch to resolve fix order dependency
      Oleg pointed out.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 3d50913..a0f1b3a 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -49,6 +49,7 @@ static inline bool try_to_freeze(void)
 }
 
 extern bool freeze_task(struct task_struct *p, bool sig_only);
+extern bool __set_freezable(bool with_signal);
 
 #ifdef CONFIG_CGROUP_FREEZER
 extern bool cgroup_freezing(struct task_struct *task);
@@ -106,18 +107,18 @@ static inline int freezer_should_skip(struct task_struct *p)
 /*
  * Tell the freezer that the current task should be frozen by it
  */
-static inline void set_freezable(void)
+static inline bool set_freezable(void)
 {
-	current->flags &= ~PF_NOFREEZE;
+	return __set_freezable(false);
 }
 
 /*
  * Tell the freezer that the current task should be frozen by it and that it
  * should send a fake signal to the task to freeze it.
  */
-static inline void set_freezable_with_signal(void)
+static inline bool set_freezable_with_signal(void)
 {
-	current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG);
+	return __set_freezable(true);
 }
 
 /*
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 95a1238..b1e7a7b 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -171,3 +171,28 @@ void __thaw_task(struct task_struct *p)
 	}
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
+
+/**
+ * __set_freezable - make %current freezable
+ * @with_signal: do we want %TIF_SIGPENDING for notification too?
+ *
+ * Mark %current freezable and enter refrigerator if necessary.
+ */
+bool __set_freezable(bool with_signal)
+{
+	might_sleep();
+
+	/*
+	 * Modify flags while holding freezer_lock.  This ensures the
+	 * freezer notices that we aren't frozen yet or the freezing
+	 * condition is visible to try_to_freeze() below.
+	 */
+	spin_lock_irq(&freezer_lock);
+	current->flags &= ~PF_NOFREEZE;
+	if (with_signal)
+		current->flags &= ~PF_FREEZER_NOSIG;
+	spin_unlock_irq(&freezer_lock);
+
+	return try_to_freeze();
+}
+EXPORT_SYMBOL(__set_freezable);
-- 
cgit v0.10.2


From 5ece3eae4cdb968c269e0bc7e2c0e2b223552025 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:26 -0800
Subject: freezer: restructure __refrigerator()

If another freeze happens before all tasks leave FROZEN state after
being thawed, the freezer can see the existing FROZEN and consider the
tasks to be frozen but they can clear FROZEN without checking the new
freezing().

Oleg suggested restructuring __refrigerator() such that there's single
condition check section inside freezer_lock and sigpending is cleared
afterwards, which fixes the problem and simplifies the code.
Restructure accordingly.

-v2: Frozen loop exited without releasing freezer_lock.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>

diff --git a/kernel/freezer.c b/kernel/freezer.c
index b1e7a7b..c3496c6 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -52,39 +52,29 @@ bool __refrigerator(bool check_kthr_stop)
 	/* Hmm, should we be allowed to suspend when there are realtime
 	   processes around? */
 	bool was_frozen = false;
-	long save;
+	long save = current->state;
 
-	/*
-	 * No point in checking freezing() again - the caller already did.
-	 * Proceed to enter FROZEN.
-	 */
-	spin_lock_irq(&freezer_lock);
-repeat:
-	current->flags |= PF_FROZEN;
-	spin_unlock_irq(&freezer_lock);
-
-	save = current->state;
 	pr_debug("%s entered refrigerator\n", current->comm);
 
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* We sent fake signal, clean it up */
-	spin_unlock_irq(&current->sighand->siglock);
-
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
+
+		spin_lock_irq(&freezer_lock);
+		current->flags |= PF_FROZEN;
 		if (!freezing(current) ||
 		    (check_kthr_stop && kthread_should_stop()))
+			current->flags &= ~PF_FROZEN;
+		spin_unlock_irq(&freezer_lock);
+
+		if (!(current->flags & PF_FROZEN))
 			break;
 		was_frozen = true;
 		schedule();
 	}
 
-	/* leave FROZEN */
-	spin_lock_irq(&freezer_lock);
-	if (freezing(current))
-		goto repeat;
-	current->flags &= ~PF_FROZEN;
-	spin_unlock_irq(&freezer_lock);
+	spin_lock_irq(&current->sighand->siglock);
+	recalc_sigpending(); /* We sent fake signal, clean it up */
+	spin_unlock_irq(&current->sighand->siglock);
 
 	pr_debug("%s left refrigerator\n", current->comm);
 
-- 
cgit v0.10.2


From 37ad8aca94a1da2112a7c56151390914e80d1113 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:26 -0800
Subject: freezer: use lock_task_sighand() in fake_signal_wake_up()

cgroup_freezer calls freeze_task() without holding tasklist_lock and,
if the task is exiting, its ->sighand may be gone by the time
fake_signal_wake_up() is called.  Use lock_task_sighand() instead of
accessing ->sighand directly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Paul Menage <paul@paulmenage.org>

diff --git a/kernel/freezer.c b/kernel/freezer.c
index c3496c6..389549f 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -93,9 +93,10 @@ static void fake_signal_wake_up(struct task_struct *p)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&p->sighand->siglock, flags);
-	signal_wake_up(p, 0);
-	spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	if (lock_task_sighand(p, &flags)) {
+		signal_wake_up(p, 0);
+		unlock_task_sighand(p, &flags);
+	}
 }
 
 /**
-- 
cgit v0.10.2


From 839e3407d90a810318d17c17ceb3d5928a910704 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:26 -0800
Subject: freezer: remove unused @sig_only from freeze_task()

After "freezer: make freezing() test freeze conditions in effect
instead of TIF_FREEZE", freezing() returns authoritative answer on
whether the current task should freeze or not and freeze_task()
doesn't need or use @sig_only.  Remove it.

While at it, rewrite function comment for freeze_task() and rename
@sig_only to @user_only in try_to_freeze_tasks().

This patch doesn't cause any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index a0f1b3a..a28842e 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -48,7 +48,7 @@ static inline bool try_to_freeze(void)
 	return __refrigerator(false);
 }
 
-extern bool freeze_task(struct task_struct *p, bool sig_only);
+extern bool freeze_task(struct task_struct *p);
 extern bool __set_freezable(bool with_signal);
 
 #ifdef CONFIG_CGROUP_FREEZER
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 2327ad1..e411a60 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -206,7 +206,7 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
 
 	/* Locking avoids race with FREEZING -> THAWED transitions. */
 	if (freezer->state == CGROUP_FREEZING)
-		freeze_task(task, true);
+		freeze_task(task);
 	spin_unlock_irq(&freezer->lock);
 }
 
@@ -274,7 +274,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
 
 	cgroup_iter_start(cgroup, &it);
 	while ((task = cgroup_iter_next(cgroup, &it))) {
-		if (!freeze_task(task, true))
+		if (!freeze_task(task))
 			continue;
 		if (frozen(task))
 			continue;
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 389549f..2589a61 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -100,20 +100,17 @@ static void fake_signal_wake_up(struct task_struct *p)
 }
 
 /**
- *	freeze_task - send a freeze request to given task
- *	@p: task to send the request to
- *	@sig_only: if set, the request will only be sent if the task has the
- *		PF_FREEZER_NOSIG flag unset
- *	Return value: 'false', if @sig_only is set and the task has
- *		PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
+ * freeze_task - send a freeze request to given task
+ * @p: task to send the request to
  *
- *	The freeze request is sent by setting the tasks's TIF_FREEZE flag and
- *	either sending a fake signal to it or waking it up, depending on whether
- *	or not it has PF_FREEZER_NOSIG set.  If @sig_only is set and the task
- *	has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
- *	TIF_FREEZE flag will not be set.
+ * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE
+ * flag and either sending a fake signal to it or waking it up, depending
+ * on whether it has %PF_FREEZER_NOSIG set.
+ *
+ * RETURNS:
+ * %false, if @p is not freezing or already frozen; %true, otherwise
  */
-bool freeze_task(struct task_struct *p, bool sig_only)
+bool freeze_task(struct task_struct *p)
 {
 	unsigned long flags;
 
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0beb51e..77274c9 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -22,7 +22,7 @@
  */
 #define TIMEOUT	(20 * HZ)
 
-static int try_to_freeze_tasks(bool sig_only)
+static int try_to_freeze_tasks(bool user_only)
 {
 	struct task_struct *g, *p;
 	unsigned long end_time;
@@ -37,14 +37,14 @@ static int try_to_freeze_tasks(bool sig_only)
 
 	end_time = jiffies + TIMEOUT;
 
-	if (!sig_only)
+	if (!user_only)
 		freeze_workqueues_begin();
 
 	while (true) {
 		todo = 0;
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
-			if (p == current || !freeze_task(p, sig_only))
+			if (p == current || !freeze_task(p))
 				continue;
 
 			/*
@@ -65,7 +65,7 @@ static int try_to_freeze_tasks(bool sig_only)
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 
-		if (!sig_only) {
+		if (!user_only) {
 			wq_busy = freeze_workqueues_busy();
 			todo += wq_busy;
 		}
-- 
cgit v0.10.2


From ec012476af73a1a8a82565a915e9b48c2e337878 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Nov 2011 12:32:26 -0800
Subject: usb_storage: don't use set_freezable_with_signal()

The current implementation of set_freezable_with_signal() is buggy and
tricky to get right.  usb-storage is the only user and its use can be
avoided trivially.

All usb-storage wants is to be able to sleep with timeout and get
woken up if freezing() becomes true.  This can be trivially
implemented by doing interruptible wait w/ freezing() included in the
wait condition.  There's no reason to use set_freezable_with_signal().

Perform interruptible wait on freezing() instead of using
set_freezable_with_signal(), which is scheduled for removal.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Seth Forshee <seth.forshee@canonical.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index c325e69..aa84b3d 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -831,7 +831,8 @@ static int usb_stor_scan_thread(void * __us)
 
 	dev_dbg(dev, "device found\n");
 
-	set_freezable_with_signal();
+	set_freezable();
+
 	/*
 	 * Wait for the timeout to expire or for a disconnect
 	 *
@@ -839,16 +840,16 @@ static int usb_stor_scan_thread(void * __us)
 	 * fail to freeze, but we can't be non-freezable either. Nor can
 	 * khubd freeze while waiting for scanning to complete as it may
 	 * hold the device lock, causing a hang when suspending devices.
-	 * So we request a fake signal when freezing and use
-	 * interruptible sleep to kick us out of our wait early when
-	 * freezing happens.
+	 * So instead of using wait_event_freezable(), explicitly test
+	 * for (DONT_SCAN || freezing) in interruptible wait and proceed
+	 * if any of DONT_SCAN, freezing or timeout has happened.
 	 */
 	if (delay_use > 0) {
 		dev_dbg(dev, "waiting for device to settle "
 				"before scanning\n");
 		wait_event_interruptible_timeout(us->delay_wait,
-				test_bit(US_FLIDX_DONT_SCAN, &us->dflags),
-				delay_use * HZ);
+				test_bit(US_FLIDX_DONT_SCAN, &us->dflags) ||
+				freezing(current), delay_use * HZ);
 	}
 
 	/* If the device is still connected, perform the scanning */
-- 
cgit v0.10.2


From adfa543e7314b36ac55a40019977de6e47946dd7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Nov 2011 09:28:16 -0800
Subject: dmatest: don't use set_freezable_with_signal()

Commit 981ed70d8e (dmatest: make dmatest threads freezable) made
dmatest kthread use set_freezable_with_signal(); however, the
interface is scheduled to be removed in the next merge window.

The problem is that unlike userland tasks there's no default place
which handles signal pending state and it isn't clear who owns and/or
is responsible for clearing TIF_SIGPENDING.  For example, in the
current code, try_to_freeze() clears TIF_SIGPENDING but it isn't sure
whether it actually owns the TIF_SIGPENDING nor is it race-free -
ie. the task may continue to run with TIF_SIGPENDING set after the
freezable section.

Unfortunately, we don't have wait_for_completion_freezable_timeout().
This patch open codes it and uses wait_event_freezable_timeout()
instead and removes timeout reloading - wait_event_freezable_timeout()
won't return across freezing events (currently racy but fix scheduled)
and timer doesn't decrement while the task is in freezer.  Although
this does lose timer-reset-over-freezing, given that timeout is
supposed to be long enough and failure to finish inside is considered
irrecoverable, I don't think this is worth the complexity.

While at it, move completion to outer scope and explain that we're
ignoring dangling pointer problem after timeout.  This should give
slightly better chance at avoiding oops after timeout.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Cc: Nicolas Ferre <nicolas.ferre@atmel.com>

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index eb1d864..2b8661b 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -214,9 +214,18 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
 	return error_count;
 }
 
-static void dmatest_callback(void *completion)
+/* poor man's completion - we want to use wait_event_freezable() on it */
+struct dmatest_done {
+	bool			done;
+	wait_queue_head_t	*wait;
+};
+
+static void dmatest_callback(void *arg)
 {
-	complete(completion);
+	struct dmatest_done *done = arg;
+
+	done->done = true;
+	wake_up_all(done->wait);
 }
 
 /*
@@ -235,7 +244,9 @@ static void dmatest_callback(void *completion)
  */
 static int dmatest_func(void *data)
 {
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait);
 	struct dmatest_thread	*thread = data;
+	struct dmatest_done	done = { .wait = &done_wait };
 	struct dma_chan		*chan;
 	const char		*thread_name;
 	unsigned int		src_off, dst_off, len;
@@ -252,7 +263,7 @@ static int dmatest_func(void *data)
 	int			i;
 
 	thread_name = current->comm;
-	set_freezable_with_signal();
+	set_freezable();
 
 	ret = -ENOMEM;
 
@@ -306,9 +317,6 @@ static int dmatest_func(void *data)
 		struct dma_async_tx_descriptor *tx = NULL;
 		dma_addr_t dma_srcs[src_cnt];
 		dma_addr_t dma_dsts[dst_cnt];
-		struct completion cmp;
-		unsigned long start, tmo, end = 0 /* compiler... */;
-		bool reload = true;
 		u8 align = 0;
 
 		total_tests++;
@@ -391,9 +399,9 @@ static int dmatest_func(void *data)
 			continue;
 		}
 
-		init_completion(&cmp);
+		done.done = false;
 		tx->callback = dmatest_callback;
-		tx->callback_param = &cmp;
+		tx->callback_param = &done;
 		cookie = tx->tx_submit(tx);
 
 		if (dma_submit_error(cookie)) {
@@ -407,20 +415,20 @@ static int dmatest_func(void *data)
 		}
 		dma_async_issue_pending(chan);
 
-		do {
-			start = jiffies;
-			if (reload)
-				end = start + msecs_to_jiffies(timeout);
-			else if (end <= start)
-				end = start + 1;
-			tmo = wait_for_completion_interruptible_timeout(&cmp,
-								end - start);
-			reload = try_to_freeze();
-		} while (tmo == -ERESTARTSYS);
+		wait_event_freezable_timeout(done_wait, done.done,
+					     msecs_to_jiffies(timeout));
 
 		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
 
-		if (tmo == 0) {
+		if (!done.done) {
+			/*
+			 * We're leaving the timed out dma operation with
+			 * dangling pointer to done_wait.  To make this
+			 * correct, we'll need to allocate wait_done for
+			 * each test iteration and perform "who's gonna
+			 * free it this time?" dancing.  For now, just
+			 * leave it dangling.
+			 */
 			pr_warning("%s: #%u: test timed out\n",
 				   thread_name, total_tests - 1);
 			failed_tests++;
-- 
cgit v0.10.2


From 34b087e48367c252e343c2f8de65676a78af1e4a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Nov 2011 09:28:17 -0800
Subject: freezer: kill unused set_freezable_with_signal()

There's no in-kernel user of set_freezable_with_signal() left.  Mixing
TIF_SIGPENDING with kernel threads can lead to nasty corner cases as
kernel threads never travel signal delivery path on their own.

e.g. the current implementation is buggy in the cancelation path of
__thaw_task().  It calls recalc_sigpending_and_wake() in an attempt to
clear TIF_SIGPENDING but the function never clears it regardless of
sigpending state.  This means that signallable freezable kthreads may
continue executing with !freezing() && stuck TIF_SIGPENDING, which can
be troublesome.

This patch removes set_freezable_with_signal() along with
PF_FREEZER_NOSIG and recalc_sigpending*() calls in freezer.  User
tasks get TIF_SIGPENDING, kernel tasks get woken up and the spurious
sigpending is dealt with in the usual signal delivery path.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index a28842e..a33550f 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -49,7 +49,7 @@ static inline bool try_to_freeze(void)
 }
 
 extern bool freeze_task(struct task_struct *p);
-extern bool __set_freezable(bool with_signal);
+extern bool set_freezable(void);
 
 #ifdef CONFIG_CGROUP_FREEZER
 extern bool cgroup_freezing(struct task_struct *task);
@@ -105,23 +105,6 @@ static inline int freezer_should_skip(struct task_struct *p)
 }
 
 /*
- * Tell the freezer that the current task should be frozen by it
- */
-static inline bool set_freezable(void)
-{
-	return __set_freezable(false);
-}
-
-/*
- * Tell the freezer that the current task should be frozen by it and that it
- * should send a fake signal to the task to freeze it.
- */
-static inline bool set_freezable_with_signal(void)
-{
-	return __set_freezable(true);
-}
-
-/*
  * Freezer-friendly wrappers around wait_event_interruptible(),
  * wait_event_killable() and wait_event_interruptible_timeout(), originally
  * defined in <linux/wait.h>
@@ -176,7 +159,6 @@ static inline void freezer_do_not_count(void) {}
 static inline void freezer_count(void) {}
 static inline int freezer_should_skip(struct task_struct *p) { return 0; }
 static inline void set_freezable(void) {}
-static inline void set_freezable_with_signal(void) {}
 
 #define wait_event_freezable(wq, condition)				\
 		wait_event_interruptible(wq, condition)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d12bd03..2f90470 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1788,7 +1788,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezable */
-#define PF_FREEZER_NOSIG 0x80000000	/* Freezer won't send signals to it */
 
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 2589a61..9815b8d 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -39,7 +39,7 @@ bool freezing_slow_path(struct task_struct *p)
 	if (pm_nosig_freezing || cgroup_freezing(p))
 		return true;
 
-	if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG))
+	if (pm_freezing && !(p->flags & PF_KTHREAD))
 		return true;
 
 	return false;
@@ -72,10 +72,6 @@ bool __refrigerator(bool check_kthr_stop)
 		schedule();
 	}
 
-	spin_lock_irq(&current->sighand->siglock);
-	recalc_sigpending(); /* We sent fake signal, clean it up */
-	spin_unlock_irq(&current->sighand->siglock);
-
 	pr_debug("%s left refrigerator\n", current->comm);
 
 	/*
@@ -120,7 +116,7 @@ bool freeze_task(struct task_struct *p)
 		return false;
 	}
 
-	if (!(p->flags & PF_FREEZER_NOSIG)) {
+	if (!(p->flags & PF_KTHREAD)) {
 		fake_signal_wake_up(p);
 		/*
 		 * fake_signal_wake_up() goes through p's scheduler
@@ -145,28 +141,19 @@ void __thaw_task(struct task_struct *p)
 	 * be visible to @p as waking up implies wmb.  Waking up inside
 	 * freezer_lock also prevents wakeups from leaking outside
 	 * refrigerator.
-	 *
-	 * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to
-	 * avoid leaving dangling TIF_SIGPENDING behind.
 	 */
 	spin_lock_irqsave(&freezer_lock, flags);
-	if (frozen(p)) {
+	if (frozen(p))
 		wake_up_process(p);
-	} else {
-		spin_lock(&p->sighand->siglock);
-		recalc_sigpending_and_wake(p);
-		spin_unlock(&p->sighand->siglock);
-	}
 	spin_unlock_irqrestore(&freezer_lock, flags);
 }
 
 /**
- * __set_freezable - make %current freezable
- * @with_signal: do we want %TIF_SIGPENDING for notification too?
+ * set_freezable - make %current freezable
  *
  * Mark %current freezable and enter refrigerator if necessary.
  */
-bool __set_freezable(bool with_signal)
+bool set_freezable(void)
 {
 	might_sleep();
 
@@ -177,10 +164,8 @@ bool __set_freezable(bool with_signal)
 	 */
 	spin_lock_irq(&freezer_lock);
 	current->flags &= ~PF_NOFREEZE;
-	if (with_signal)
-		current->flags &= ~PF_FREEZER_NOSIG;
 	spin_unlock_irq(&freezer_lock);
 
 	return try_to_freeze();
 }
-EXPORT_SYMBOL(__set_freezable);
+EXPORT_SYMBOL(set_freezable);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 1c36dea..3d3de633 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -282,7 +282,7 @@ int kthreadd(void *unused)
 	set_cpus_allowed_ptr(tsk, cpu_all_mask);
 	set_mems_allowed(node_states[N_HIGH_MEMORY]);
 
-	current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
+	current->flags |= PF_NOFREEZE;
 
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);
-- 
cgit v0.10.2


From 24b7ead3fb0bae267c2ee50898eb4c13aedd1e9f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 23 Nov 2011 09:28:17 -0800
Subject: freezer: fix wait_event_freezable/__thaw_task races

wait_event_freezable() and friends stop the waiting if try_to_freeze()
fails. This is not right, we can race with __thaw_task() and in this
case

	- wait_event_freezable() returns the wrong ERESTARTSYS

	- wait_event_freezable_timeout() can return the positive
	  value while condition == F

Change the code to always check __retval/condition before return.

Note: with or without this patch the timeout logic looks strange,
probably we should recalc timeout if try_to_freeze() returns T.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index a33550f..09570ac 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -122,28 +122,30 @@ static inline int freezer_should_skip(struct task_struct *p)
 #define wait_event_freezable(wq, condition)				\
 ({									\
 	int __retval;							\
-	do {								\
+	for (;;) {							\
 		__retval = wait_event_interruptible(wq, 		\
 				(condition) || freezing(current));	\
-		if (__retval && !freezing(current))			\
+		if (__retval || (condition))				\
 			break;						\
-		else if (!(condition))					\
-			__retval = -ERESTARTSYS;			\
-	} while (try_to_freeze());					\
+		try_to_freeze();					\
+	}								\
 	__retval;							\
 })
 
-
 #define wait_event_freezable_timeout(wq, condition, timeout)		\
 ({									\
 	long __retval = timeout;					\
-	do {								\
+	for (;;) {							\
 		__retval = wait_event_interruptible_timeout(wq,		\
 				(condition) || freezing(current),	\
 				__retval); 				\
-	} while (try_to_freeze());					\
+		if (__retval <= 0 || (condition))			\
+			break;						\
+		try_to_freeze();					\
+	}								\
 	__retval;							\
 })
+
 #else /* !CONFIG_FREEZER */
 static inline bool frozen(struct task_struct *p) { return false; }
 static inline bool freezing(struct task_struct *p) { return false; }
-- 
cgit v0.10.2


From 341d4166175e9b7911444f5a33b1c9efb8f15c85 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Sat, 19 Nov 2011 14:39:01 +0100
Subject: PM: Fix indentation and remove extraneous whitespaces in
 kernel/power/main.c

Lack of proper indentation of the goto statement decreases the readability
of code significantly. In fact, this made me look twice at the code to check
whether it really does what it should be doing. Fix this.

And in the same file, there are some extra whitespaces. Get rid of them too.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/kernel/power/main.c b/kernel/power/main.c
index 36e0f09..7d36fb3 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2003 Patrick Mochel
  * Copyright (c) 2003 Open Source Development Lab
- * 
+ *
  * This file is released under the GPLv2
  *
  */
@@ -240,7 +240,7 @@ struct kobject *power_kobj;
  *	'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and
  *	'disk' (Suspend-to-Disk).
  *
- *	store() accepts one of those strings, translates it into the 
+ *	store() accepts one of those strings, translates it into the
  *	proper enumerated value, and initiates a suspend transition.
  */
 static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
@@ -282,7 +282,7 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
 	/* First, check if we are requested to hibernate */
 	if (len == 4 && !strncmp(buf, "disk", len)) {
 		error = hibernate();
-  goto Exit;
+		goto Exit;
 	}
 
 #ifdef CONFIG_SUSPEND
-- 
cgit v0.10.2


From 6a76b7a9cc93dec6ae58d70f1257d234291908e0 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Mon, 21 Nov 2011 23:32:35 +0100
Subject: PM / Memory-hotplug: Avoid task freezing failures

The lock_system_sleep() function is used in the memory hotplug code at
several places in order to implement mutual exclusion with hibernation.
However, this function tries to acquire the 'pm_mutex' lock using
mutex_lock() and hence blocks in TASK_UNINTERRUPTIBLE state if it doesn't
get the lock. This would lead to task freezing failures and hence
hibernation failure as a consequence, even though the hibernation call path
successfully acquired the lock.

But it is to be noted that, since this task tries to acquire pm_mutex, if it
blocks due to this, we are *100% sure* that this task is not going to run
as long as hibernation sequence is in progress, since hibernation releases
'pm_mutex' only at the very end, when everything is done.
And this means, this task is going to be anyway blocked for much more longer
than what the freezer intends to achieve; which means, freezing and thawing
doesn't really make any difference to this task!

So, to fix freezing failures, we just ask the freezer to skip freezing this
task, since it is already "frozen enough".

But instead of calling freezer_do_not_count() and freezer_count() as it is,
we use only the relevant parts of those functions, because restrictions
such as 'the task should be a userspace one' etc., might not be relevant in
this scenario.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 57a6924..1f7fff4 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -380,12 +380,16 @@ static inline void unlock_system_sleep(void) {}
 
 static inline void lock_system_sleep(void)
 {
+	/* simplified freezer_do_not_count() */
+	current->flags |= PF_FREEZER_SKIP;
 	mutex_lock(&pm_mutex);
 }
 
 static inline void unlock_system_sleep(void)
 {
 	mutex_unlock(&pm_mutex);
+	/* simplified freezer_count() */
+	current->flags &= ~PF_FREEZER_SKIP;
 }
 #endif
 
-- 
cgit v0.10.2


From d74e278aaf3b0fe4b02af67055aa71babcc0cebe Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 21 Nov 2011 23:33:28 +0100
Subject: PM / Sleep: Remove unnecessary label and jumps to it form PM core
 code

The "End" label in device_prepare() in drivers/base/power/main.c is
not necessary and the jumps to it have no real effect, so remove them
all.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index c3d2dfc..1172aea 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1033,22 +1033,16 @@ static int device_prepare(struct device *dev, pm_message_t state)
 		if (dev->pm_domain->ops.prepare)
 			error = dev->pm_domain->ops.prepare(dev);
 		suspend_report_result(dev->pm_domain->ops.prepare, error);
-		if (error)
-			goto End;
 	} else if (dev->type && dev->type->pm) {
 		pm_dev_dbg(dev, state, "preparing type ");
 		if (dev->type->pm->prepare)
 			error = dev->type->pm->prepare(dev);
 		suspend_report_result(dev->type->pm->prepare, error);
-		if (error)
-			goto End;
 	} else if (dev->class && dev->class->pm) {
 		pm_dev_dbg(dev, state, "preparing class ");
 		if (dev->class->pm->prepare)
 			error = dev->class->pm->prepare(dev);
 		suspend_report_result(dev->class->pm->prepare, error);
-		if (error)
-			goto End;
 	} else if (dev->bus && dev->bus->pm) {
 		pm_dev_dbg(dev, state, "preparing ");
 		if (dev->bus->pm->prepare)
@@ -1056,7 +1050,6 @@ static int device_prepare(struct device *dev, pm_message_t state)
 		suspend_report_result(dev->bus->pm->prepare, error);
 	}
 
- End:
 	device_unlock(dev);
 
 	return error;
-- 
cgit v0.10.2


From 64e94aafb6a5c4f419e9b8f93950914b5ac162a9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 21 Nov 2011 23:33:55 +0100
Subject: PM / Sleep: Simplify device_suspend_noirq()

Remove a few if () and return statements in device_suspend_noirq()
that aren't really necessary.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reviewed-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 1172aea..406f82c 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -763,31 +763,23 @@ static pm_message_t resume_event(pm_message_t sleep_state)
  */
 static int device_suspend_noirq(struct device *dev, pm_message_t state)
 {
-	int error;
+	int error = 0;
 
 	if (dev->pm_domain) {
 		pm_dev_dbg(dev, state, "LATE power domain ");
 		error = pm_noirq_op(dev, &dev->pm_domain->ops, state);
-		if (error)
-			return error;
 	} else if (dev->type && dev->type->pm) {
 		pm_dev_dbg(dev, state, "LATE type ");
 		error = pm_noirq_op(dev, dev->type->pm, state);
-		if (error)
-			return error;
 	} else if (dev->class && dev->class->pm) {
 		pm_dev_dbg(dev, state, "LATE class ");
 		error = pm_noirq_op(dev, dev->class->pm, state);
-		if (error)
-			return error;
 	} else if (dev->bus && dev->bus->pm) {
 		pm_dev_dbg(dev, state, "LATE ");
 		error = pm_noirq_op(dev, dev->bus->pm, state);
-		if (error)
-			return error;
 	}
 
-	return 0;
+	return error;
 }
 
 /**
-- 
cgit v0.10.2


From 953a206393b1533ceb0e7d725cc5a8c8d7ed97dd Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Tue, 22 Nov 2011 23:20:31 +0100
Subject: PM / Hibernate: Refactor and simplify hibernation_snapshot() code

The goto statements in hibernation_snapshot() are a bit complex.
Refactor the code to remove some of them, thereby simplifying the
implementation.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index a6b0503..ebf62c3 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -333,7 +333,7 @@ static int create_image(int platform_mode)
  */
 int hibernation_snapshot(int platform_mode)
 {
-	pm_message_t msg = PMSG_RECOVER;
+	pm_message_t msg;
 	int error;
 
 	error = platform_begin(platform_mode);
@@ -362,26 +362,26 @@ int hibernation_snapshot(int platform_mode)
 
 	error = dpm_prepare(PMSG_FREEZE);
 	if (error) {
-		dpm_complete(msg);
+		dpm_complete(PMSG_RECOVER);
 		goto Cleanup;
 	}
 
 	suspend_console();
 	pm_restrict_gfp_mask();
+
 	error = dpm_suspend(PMSG_FREEZE);
-	if (error)
-		goto Recover_platform;
 
-	if (hibernation_test(TEST_DEVICES))
-		goto Recover_platform;
+	if (error || hibernation_test(TEST_DEVICES))
+		platform_recover(platform_mode);
+	else
+		error = create_image(platform_mode);
 
-	error = create_image(platform_mode);
 	/*
-	 * Control returns here (1) after the image has been created or the
+	 * In the case that we call create_image() above, the control
+	 * returns here (1) after the image has been created or the
 	 * image creation has failed and (2) after a successful restore.
 	 */
 
- Resume_devices:
 	/* We may need to release the preallocated image pages here. */
 	if (error || !in_suspend)
 		swsusp_free();
@@ -399,10 +399,6 @@ int hibernation_snapshot(int platform_mode)
 	platform_end(platform_mode);
 	return error;
 
- Recover_platform:
-	platform_recover(platform_mode);
-	goto Resume_devices;
-
  Cleanup:
 	swsusp_free();
 	goto Close;
-- 
cgit v0.10.2


From 62c9ea6b120688d800b4d892eaf737c20a73e86b Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 25 Nov 2011 00:44:55 +0100
Subject: Freezer: fix more fallout from the thaw_process rename

Commit 944e192db53c "freezer: rename thaw_process() to __thaw_task()
and simplify the implementation" did not create a !CONFIG_FREEZER version
of __thaw_task().

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 09570ac..c1ee283 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -149,6 +149,7 @@ static inline int freezer_should_skip(struct task_struct *p)
 #else /* !CONFIG_FREEZER */
 static inline bool frozen(struct task_struct *p) { return false; }
 static inline bool freezing(struct task_struct *p) { return false; }
+static inline void __thaw_task(struct task_struct *t) {}
 
 static inline bool __refrigerator(bool check_kthr_stop) { return false; }
 static inline int freeze_processes(void) { return -ENOSYS; }
-- 
cgit v0.10.2


From 0118521cc7acb3ccbc1a01d6144ac32be9d56a4c Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 1 Dec 2011 22:32:43 +0100
Subject: PM / Hibernate: Enable usermodehelpers in software_resume() error
 path

In the software_resume() function defined in kernel/power/hibernate.c,
if the call to create_basic_memory_bitmaps() fails, the usermodehelpers
are not enabled (which had been disabled in the previous step). Fix it.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index ebf62c3..1fcf9de 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -807,8 +807,10 @@ static int software_resume(void)
 		goto close_finish;
 
 	error = create_basic_memory_bitmaps();
-	if (error)
+	if (error) {
+		usermodehelper_enable();
 		goto close_finish;
+	}
 
 	pr_debug("PM: Preparing processes for restore.\n");
 	error = prepare_processes();
-- 
cgit v0.10.2


From 97819a26224f019e73d88bb2fd4eb5a614860461 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 1 Dec 2011 22:33:10 +0100
Subject: PM / Hibernate: Thaw processes in SNAPSHOT_CREATE_IMAGE ioctl test
 path

Commit 2aede851ddf08666f68ffc17be446420e9d2a056 (PM / Hibernate: Freeze
kernel threads after preallocating memory) moved the freezing of kernel
threads to hibernation_snapshot() function.

So now, if the call to hibernation_snapshot() returns early due to a
successful hibernation test, the caller has to thaw processes to ensure
that the system gets back to its original state.

But in SNAPSHOT_CREATE_IMAGE hibernation ioctl, the caller does not thaw
processes in case hibernation_snapshot() returned due to a successful
freezer test. Fix this issue. But note we still send the value of 'in_suspend'
(which is now 0) to userspace, because we are not in an error path per-se,
and moreover, the value of in_suspend correctly depicts the situation here.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 1fcf9de..c10cb0f 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -55,7 +55,7 @@ enum {
 
 static int hibernation_mode = HIBERNATION_SHUTDOWN;
 
-static bool freezer_test_done;
+bool freezer_test_done;
 
 static const struct platform_hibernation_ops *hibernation_ops;
 
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 23a2db1..0c4defe 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -50,6 +50,8 @@ static inline char *check_image_kernel(struct swsusp_info *info)
 #define SPARE_PAGES	((1024 * 1024) >> PAGE_SHIFT)
 
 /* kernel/power/hibernate.c */
+extern bool freezer_test_done;
+
 extern int hibernation_snapshot(int platform_mode);
 extern int hibernation_restore(int platform_mode);
 extern int hibernation_platform_enter(void);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6d8f535..e2aff0f 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -283,10 +283,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		}
 		pm_restore_gfp_mask();
 		error = hibernation_snapshot(data->platform_support);
-		if (!error)
+		if (!error) {
 			error = put_user(in_suspend, (int __user *)arg);
-		if (!error)
-			data->ready = 1;
+			if (!error && !freezer_test_done)
+				data->ready = 1;
+			if (freezer_test_done) {
+				freezer_test_done = false;
+				thaw_processes();
+			}
+		}
 		break;
 
 	case SNAPSHOT_ATOMIC_RESTORE:
-- 
cgit v0.10.2


From 48580ab8729865c81e148d59159fbe2aa7865511 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 1 Dec 2011 22:33:20 +0100
Subject: PM / Hibernate: Remove deprecated hibernation test modes

The hibernation test modes 'test' and 'testproc' are deprecated, because
the 'pm_test' framework offers much more fine-grained control for debugging
suspend and hibernation related problems.

So, remove the deprecated 'test' and 'testproc' hibernation test modes.

Suggested-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index c10cb0f..5314a94 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -43,8 +43,6 @@ int in_suspend __nosavedata;
 enum {
 	HIBERNATION_INVALID,
 	HIBERNATION_PLATFORM,
-	HIBERNATION_TEST,
-	HIBERNATION_TESTPROC,
 	HIBERNATION_SHUTDOWN,
 	HIBERNATION_REBOOT,
 	/* keep last */
@@ -96,15 +94,6 @@ static void hibernation_debug_sleep(void)
 	mdelay(5000);
 }
 
-static int hibernation_testmode(int mode)
-{
-	if (hibernation_mode == mode) {
-		hibernation_debug_sleep();
-		return 1;
-	}
-	return 0;
-}
-
 static int hibernation_test(int level)
 {
 	if (pm_test_level == level) {
@@ -114,7 +103,6 @@ static int hibernation_test(int level)
 	return 0;
 }
 #else /* !CONFIG_PM_DEBUG */
-static int hibernation_testmode(int mode) { return 0; }
 static int hibernation_test(int level) { return 0; }
 #endif /* !CONFIG_PM_DEBUG */
 
@@ -278,8 +266,7 @@ static int create_image(int platform_mode)
 		goto Platform_finish;
 
 	error = disable_nonboot_cpus();
-	if (error || hibernation_test(TEST_CPUS)
-	    || hibernation_testmode(HIBERNATION_TEST))
+	if (error || hibernation_test(TEST_CPUS))
 		goto Enable_cpus;
 
 	local_irq_disable();
@@ -349,8 +336,7 @@ int hibernation_snapshot(int platform_mode)
 	if (error)
 		goto Cleanup;
 
-	if (hibernation_test(TEST_FREEZER) ||
-		hibernation_testmode(HIBERNATION_TESTPROC)) {
+	if (hibernation_test(TEST_FREEZER)) {
 
 		/*
 		 * Indicate to the caller that we are returning due to a
@@ -586,9 +572,6 @@ int hibernation_platform_enter(void)
 static void power_down(void)
 {
 	switch (hibernation_mode) {
-	case HIBERNATION_TEST:
-	case HIBERNATION_TESTPROC:
-		break;
 	case HIBERNATION_REBOOT:
 		kernel_restart(NULL);
 		break;
@@ -853,8 +836,6 @@ static const char * const hibernation_modes[] = {
 	[HIBERNATION_PLATFORM]	= "platform",
 	[HIBERNATION_SHUTDOWN]	= "shutdown",
 	[HIBERNATION_REBOOT]	= "reboot",
-	[HIBERNATION_TEST]	= "test",
-	[HIBERNATION_TESTPROC]	= "testproc",
 };
 
 /*
@@ -863,17 +844,15 @@ static const char * const hibernation_modes[] = {
  * Hibernation can be handled in several ways.  There are a few different ways
  * to put the system into the sleep state: using the platform driver (e.g. ACPI
  * or other hibernation_ops), powering it off or rebooting it (for testing
- * mostly), or using one of the two available test modes.
+ * mostly).
  *
  * The sysfs file /sys/power/disk provides an interface for selecting the
  * hibernation mode to use.  Reading from this file causes the available modes
- * to be printed.  There are 5 modes that can be supported:
+ * to be printed.  There are 3 modes that can be supported:
  *
  *	'platform'
  *	'shutdown'
  *	'reboot'
- *	'test'
- *	'testproc'
  *
  * If a platform hibernation driver is in use, 'platform' will be supported
  * and will be used by default.  Otherwise, 'shutdown' will be used by default.
@@ -897,8 +876,6 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
 		switch (i) {
 		case HIBERNATION_SHUTDOWN:
 		case HIBERNATION_REBOOT:
-		case HIBERNATION_TEST:
-		case HIBERNATION_TESTPROC:
 			break;
 		case HIBERNATION_PLATFORM:
 			if (hibernation_ops)
@@ -939,8 +916,6 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 		switch (mode) {
 		case HIBERNATION_SHUTDOWN:
 		case HIBERNATION_REBOOT:
-		case HIBERNATION_TEST:
-		case HIBERNATION_TESTPROC:
 			hibernation_mode = mode;
 			break;
 		case HIBERNATION_PLATFORM:
-- 
cgit v0.10.2


From 5a50a7c32d630d6cdb13d69afabb0cc81b2f379c Mon Sep 17 00:00:00 2001
From: Keng-Yu Lin <kengyu@canonical.com>
Date: Fri, 2 Dec 2011 00:04:23 +0100
Subject: ACPI / PM: Do not save/restore NVS on Asus K54C/K54HR

The models do not resume correctly without acpi_sleep=nonvs.

Signed-off-by: Keng-Yu Lin <kengyu@canonical.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 6d9a3ab..0a7ed69 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -476,6 +476,22 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FW520F"),
 		},
 	},
+	{
+	.callback = init_nvs_nosave,
+	.ident = "Asus K54C",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
+		DMI_MATCH(DMI_PRODUCT_NAME, "K54C"),
+		},
+	},
+	{
+	.callback = init_nvs_nosave,
+	.ident = "Asus K54HR",
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
+		DMI_MATCH(DMI_PRODUCT_NAME, "K54HR"),
+		},
+	},
 	{},
 };
 #endif /* CONFIG_SUSPEND */
-- 
cgit v0.10.2


From 0c6aebe31861c470c8cfbfdfdfd72d1369a6440b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sat, 3 Dec 2011 00:23:43 +0100
Subject: PM / Sleep: Unify diagnostic messages from device suspend/resume

Make pm_op() and pm_noirq_op() use the same helper function for
running callbacks, which will cause them to use the same format of
diagnostic messages.  This also reduces the complexity and size of
the code quite a bit.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 406f82c..b570189 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -164,8 +164,9 @@ static ktime_t initcall_debug_start(struct device *dev)
 	ktime_t calltime = ktime_set(0, 0);
 
 	if (initcall_debug) {
-		pr_info("calling  %s+ @ %i\n",
-				dev_name(dev), task_pid_nr(current));
+		pr_info("calling  %s+ @ %i, parent: %s\n",
+			dev_name(dev), task_pid_nr(current),
+			dev->parent ? dev_name(dev->parent) : "none");
 		calltime = ktime_get();
 	}
 
@@ -210,6 +211,24 @@ static void dpm_wait_for_children(struct device *dev, bool async)
        device_for_each_child(dev, &async, dpm_wait_fn);
 }
 
+static int dpm_run_callback(struct device *dev, int (*cb)(struct device *))
+{
+	ktime_t calltime;
+	int error;
+
+	if (!cb)
+		return 0;
+
+	calltime = initcall_debug_start(dev);
+
+	error = cb(dev);
+	suspend_report_result(cb, error);
+
+	initcall_debug_report(dev, calltime, error);
+
+	return error;
+}
+
 /**
  * pm_op - Execute the PM operation appropriate for given PM event.
  * @dev: Device to handle.
@@ -221,59 +240,36 @@ static int pm_op(struct device *dev,
 		 pm_message_t state)
 {
 	int error = 0;
-	ktime_t calltime;
-
-	calltime = initcall_debug_start(dev);
 
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
 	case PM_EVENT_SUSPEND:
-		if (ops->suspend) {
-			error = ops->suspend(dev);
-			suspend_report_result(ops->suspend, error);
-		}
+		error = dpm_run_callback(dev, ops->suspend);
 		break;
 	case PM_EVENT_RESUME:
-		if (ops->resume) {
-			error = ops->resume(dev);
-			suspend_report_result(ops->resume, error);
-		}
+		error = dpm_run_callback(dev, ops->resume);
 		break;
 #endif /* CONFIG_SUSPEND */
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
-		if (ops->freeze) {
-			error = ops->freeze(dev);
-			suspend_report_result(ops->freeze, error);
-		}
+		error = dpm_run_callback(dev, ops->freeze);
 		break;
 	case PM_EVENT_HIBERNATE:
-		if (ops->poweroff) {
-			error = ops->poweroff(dev);
-			suspend_report_result(ops->poweroff, error);
-		}
+		error = dpm_run_callback(dev, ops->poweroff);
 		break;
 	case PM_EVENT_THAW:
 	case PM_EVENT_RECOVER:
-		if (ops->thaw) {
-			error = ops->thaw(dev);
-			suspend_report_result(ops->thaw, error);
-		}
+		error = dpm_run_callback(dev, ops->thaw);
 		break;
 	case PM_EVENT_RESTORE:
-		if (ops->restore) {
-			error = ops->restore(dev);
-			suspend_report_result(ops->restore, error);
-		}
+		error = dpm_run_callback(dev, ops->restore);
 		break;
 #endif /* CONFIG_HIBERNATE_CALLBACKS */
 	default:
 		error = -EINVAL;
 	}
 
-	initcall_debug_report(dev, calltime, error);
-
 	return error;
 }
 
@@ -291,70 +287,36 @@ static int pm_noirq_op(struct device *dev,
 			pm_message_t state)
 {
 	int error = 0;
-	ktime_t calltime = ktime_set(0, 0), delta, rettime;
-
-	if (initcall_debug) {
-		pr_info("calling  %s+ @ %i, parent: %s\n",
-				dev_name(dev), task_pid_nr(current),
-				dev->parent ? dev_name(dev->parent) : "none");
-		calltime = ktime_get();
-	}
 
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
 	case PM_EVENT_SUSPEND:
-		if (ops->suspend_noirq) {
-			error = ops->suspend_noirq(dev);
-			suspend_report_result(ops->suspend_noirq, error);
-		}
+		error = dpm_run_callback(dev, ops->suspend_noirq);
 		break;
 	case PM_EVENT_RESUME:
-		if (ops->resume_noirq) {
-			error = ops->resume_noirq(dev);
-			suspend_report_result(ops->resume_noirq, error);
-		}
+		error = dpm_run_callback(dev, ops->resume_noirq);
 		break;
 #endif /* CONFIG_SUSPEND */
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
-		if (ops->freeze_noirq) {
-			error = ops->freeze_noirq(dev);
-			suspend_report_result(ops->freeze_noirq, error);
-		}
+		error = dpm_run_callback(dev, ops->freeze_noirq);
 		break;
 	case PM_EVENT_HIBERNATE:
-		if (ops->poweroff_noirq) {
-			error = ops->poweroff_noirq(dev);
-			suspend_report_result(ops->poweroff_noirq, error);
-		}
+		error = dpm_run_callback(dev, ops->poweroff_noirq);
 		break;
 	case PM_EVENT_THAW:
 	case PM_EVENT_RECOVER:
-		if (ops->thaw_noirq) {
-			error = ops->thaw_noirq(dev);
-			suspend_report_result(ops->thaw_noirq, error);
-		}
+		error = dpm_run_callback(dev, ops->thaw_noirq);
 		break;
 	case PM_EVENT_RESTORE:
-		if (ops->restore_noirq) {
-			error = ops->restore_noirq(dev);
-			suspend_report_result(ops->restore_noirq, error);
-		}
+		error = dpm_run_callback(dev, ops->restore_noirq);
 		break;
 #endif /* CONFIG_HIBERNATE_CALLBACKS */
 	default:
 		error = -EINVAL;
 	}
 
-	if (initcall_debug) {
-		rettime = ktime_get();
-		delta = ktime_sub(rettime, calltime);
-		printk("initcall %s_i+ returned %d after %Ld usecs\n",
-			dev_name(dev), error,
-			(unsigned long long)ktime_to_ns(delta) >> 10);
-	}
-
 	return error;
 }
 
@@ -486,26 +448,6 @@ void dpm_resume_noirq(pm_message_t state)
 EXPORT_SYMBOL_GPL(dpm_resume_noirq);
 
 /**
- * legacy_resume - Execute a legacy (bus or class) resume callback for device.
- * @dev: Device to resume.
- * @cb: Resume callback to execute.
- */
-static int legacy_resume(struct device *dev, int (*cb)(struct device *dev))
-{
-	int error;
-	ktime_t calltime;
-
-	calltime = initcall_debug_start(dev);
-
-	error = cb(dev);
-	suspend_report_result(cb, error);
-
-	initcall_debug_report(dev, calltime, error);
-
-	return error;
-}
-
-/**
  * device_resume - Execute "resume" callbacks for given device.
  * @dev: Device to handle.
  * @state: PM transition of the system being carried out.
@@ -553,7 +495,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 			goto End;
 		} else if (dev->class->resume) {
 			pm_dev_dbg(dev, state, "legacy class ");
-			error = legacy_resume(dev, dev->class->resume);
+			error = dpm_run_callback(dev, dev->class->resume);
 			goto End;
 		}
 	}
@@ -564,7 +506,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 			error = pm_op(dev, dev->bus->pm, state);
 		} else if (dev->bus->resume) {
 			pm_dev_dbg(dev, state, "legacy ");
-			error = legacy_resume(dev, dev->bus->resume);
+			error = dpm_run_callback(dev, dev->bus->resume);
 		}
 	}
 
-- 
cgit v0.10.2


From d310310cbff18ec385c6ab4d58f33b100192a96a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Thu, 1 Dec 2011 22:44:39 +0100
Subject: Freezer / sunrpc / NFS: don't allow TASK_KILLABLE sleeps to block the
 freezer

Allow the freezer to skip wait_on_bit_killable sleeps in the sunrpc
layer. This should allow suspend and hibernate events to proceed, even
when there are RPC's pending on the wire.

Also, wrap the TASK_KILLABLE sleeps in NFS layer in freezer_do_not_count
and freezer_count calls. This allows the freezer to skip tasks that are
sleeping while looping on EJUKEBOX or NFS4ERR_DELAY sorts of errors.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a15fa..bf3a57b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -38,6 +38,7 @@
 #include <linux/nfs_xdr.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
+#include <linux/freezer.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -77,7 +78,7 @@ int nfs_wait_bit_killable(void *word)
 {
 	if (fatal_signal_pending(current))
 		return -ERESTARTSYS;
-	schedule();
+	freezable_schedule();
 	return 0;
 }
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d4bc9ed9..91943953 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -17,6 +17,7 @@
 #include <linux/nfs_page.h>
 #include <linux/lockd/bind.h>
 #include <linux/nfs_mount.h>
+#include <linux/freezer.h>
 
 #include "iostat.h"
 #include "internal.h"
@@ -32,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 		res = rpc_call_sync(clnt, msg, flags);
 		if (res != -EJUKEBOX && res != -EKEYEXPIRED)
 			break;
-		schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
+		freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
 		res = -ERESTARTSYS;
 	} while (!fatal_signal_pending(current));
 	return res;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be2bbac..b28bb19 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -53,6 +53,7 @@
 #include <linux/sunrpc/bc_xprt.h>
 #include <linux/xattr.h>
 #include <linux/utsname.h>
+#include <linux/freezer.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -241,7 +242,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 		*timeout = NFS4_POLL_RETRY_MIN;
 	if (*timeout > NFS4_POLL_RETRY_MAX)
 		*timeout = NFS4_POLL_RETRY_MAX;
-	schedule_timeout_killable(*timeout);
+	freezable_schedule_timeout_killable(*timeout);
 	if (fatal_signal_pending(current))
 		res = -ERESTARTSYS;
 	*timeout <<= 1;
@@ -3950,7 +3951,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
 static unsigned long
 nfs4_set_lock_task_retry(unsigned long timeout)
 {
-	schedule_timeout_killable(timeout);
+	freezable_schedule_timeout_killable(timeout);
 	timeout <<= 1;
 	if (timeout > NFS4_LOCK_MAXTIMEOUT)
 		return NFS4_LOCK_MAXTIMEOUT;
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f48125d..0c672588 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -41,6 +41,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
 #include <linux/lockd/bind.h>
+#include <linux/freezer.h>
 #include "internal.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
@@ -59,7 +60,7 @@ nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 		res = rpc_call_sync(clnt, msg, flags);
 		if (res != -EKEYEXPIRED)
 			break;
-		schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
+		freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
 		res = -ERESTARTSYS;
 	} while (!fatal_signal_pending(current));
 	return res;
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index c1ee283..30f06c2 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -105,6 +105,29 @@ static inline int freezer_should_skip(struct task_struct *p)
 }
 
 /*
+ * These macros are intended to be used whenever you want allow a task that's
+ * sleeping in TASK_UNINTERRUPTIBLE or TASK_KILLABLE state to be frozen. Note
+ * that neither return any clear indication of whether a freeze event happened
+ * while in this function.
+ */
+
+/* Like schedule(), but should not block the freezer. */
+#define freezable_schedule()						\
+({									\
+	freezer_do_not_count();						\
+	schedule();							\
+	freezer_count();						\
+})
+
+/* Like schedule_timeout_killable(), but should not block the freezer. */
+#define freezable_schedule_timeout_killable(timeout)			\
+({									\
+	freezer_do_not_count();						\
+	schedule_timeout_killable(timeout);				\
+	freezer_count();						\
+})
+
+/*
  * Freezer-friendly wrappers around wait_event_interruptible(),
  * wait_event_killable() and wait_event_interruptible_timeout(), originally
  * defined in <linux/wait.h>
@@ -163,6 +186,11 @@ static inline void freezer_count(void) {}
 static inline int freezer_should_skip(struct task_struct *p) { return 0; }
 static inline void set_freezable(void) {}
 
+#define freezable_schedule()  schedule()
+
+#define freezable_schedule_timeout_killable(timeout)			\
+	schedule_timeout_killable(timeout)
+
 #define wait_event_freezable(wq, condition)				\
 		wait_event_interruptible(wq, condition)
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index d12ffa5..5317b93 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -18,6 +18,7 @@
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
+#include <linux/freezer.h>
 
 #include <linux/sunrpc/clnt.h>
 
@@ -231,7 +232,7 @@ static int rpc_wait_bit_killable(void *word)
 {
 	if (fatal_signal_pending(current))
 		return -ERESTARTSYS;
-	schedule();
+	freezable_schedule();
 	return 0;
 }
 
-- 
cgit v0.10.2


From e5b16746f0f2d6883c226af52d90904ce0f7eee8 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Sat, 3 Dec 2011 00:20:30 +0100
Subject: PM / Hibernate: Replace unintuitive 'if' condition in
 kernel/power/user.c with 'else'

In the snapshot_ioctl() function, under SNAPSHOT_FREEZE, the code below
freeze_processes() is a bit unintuitive. Improve it by replacing the
second 'if' condition with an 'else' clause.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/kernel/power/user.c b/kernel/power/user.c
index c202e2e..06ea33d 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -259,7 +259,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		error = freeze_processes();
 		if (error)
 			usermodehelper_enable();
-		if (!error)
+		else
 			data->frozen = 1;
 		break;
 
-- 
cgit v0.10.2


From 467de1fc67d1bd2954eaac7019c564f28fa2b6a5 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Tue, 6 Dec 2011 23:17:51 +0100
Subject: PM / Freezer: Remove the "userspace only" constraint from
 freezer[_do_not]_count()

At present, the functions freezer_count() and freezer_do_not_count()
impose the restriction that they are effective only for userspace processes.
However, now, these functions have found more utility than originally
intended by the commit which introduced it: ba96a0c8 (freezer:
fix vfork problem). And moreover, even the vfork issue actually does not
need the above restriction in these functions.

So, modify these functions to make them work even for kernel threads, so
that they can be used at other places in the kernel, where the userspace
restriction doesn't apply.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 30f06c2..7bcfe73 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -67,33 +67,27 @@ static inline bool cgroup_freezing(struct task_struct *task)
  * appropriately in case the child has exited before the freezing of tasks is
  * complete.  However, we don't want kernel threads to be frozen in unexpected
  * places, so we allow them to block freeze_processes() instead or to set
- * PF_NOFREEZE if needed and PF_FREEZER_SKIP is only set for userland vfork
- * parents.  Fortunately, in the ____call_usermodehelper() case the parent won't
- * really block freeze_processes(), since ____call_usermodehelper() (the child)
- * does a little before exec/exit and it can't be frozen before waking up the
- * parent.
+ * PF_NOFREEZE if needed. Fortunately, in the ____call_usermodehelper() case the
+ * parent won't really block freeze_processes(), since ____call_usermodehelper()
+ * (the child) does a little before exec/exit and it can't be frozen before
+ * waking up the parent.
  */
 
-/*
- * If the current task is a user space one, tell the freezer not to count it as
- * freezable.
- */
+
+/* Tell the freezer not to count the current task as freezable. */
 static inline void freezer_do_not_count(void)
 {
-	if (current->mm)
-		current->flags |= PF_FREEZER_SKIP;
+	current->flags |= PF_FREEZER_SKIP;
 }
 
 /*
- * If the current task is a user space one, tell the freezer to count it as
- * freezable again and try to freeze it.
+ * Tell the freezer to count the current task as freezable again and try to
+ * freeze it.
  */
 static inline void freezer_count(void)
 {
-	if (current->mm) {
-		current->flags &= ~PF_FREEZER_SKIP;
-		try_to_freeze();
-	}
+	current->flags &= ~PF_FREEZER_SKIP;
+	try_to_freeze();
 }
 
 /*
-- 
cgit v0.10.2


From 33e638b9070ba5e8812836e20390da6a6af13900 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Tue, 6 Dec 2011 23:18:12 +0100
Subject: PM / Sleep: Use the freezer_count() functions in
 [un]lock_system_sleep() APIs

Now that freezer_count() and freezer_do_not_count() don't have the restriction
that they are effective only when called by userspace processes, use
them in lock_system_sleep() and unlock_system_sleep() instead of open-coding
their parts.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 1f7fff4..906d62c 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -6,6 +6,7 @@
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/mm.h>
+#include <linux/freezer.h>
 #include <asm/errno.h>
 
 #ifdef CONFIG_VT
@@ -380,16 +381,14 @@ static inline void unlock_system_sleep(void) {}
 
 static inline void lock_system_sleep(void)
 {
-	/* simplified freezer_do_not_count() */
-	current->flags |= PF_FREEZER_SKIP;
+	freezer_do_not_count();
 	mutex_lock(&pm_mutex);
 }
 
 static inline void unlock_system_sleep(void)
 {
 	mutex_unlock(&pm_mutex);
-	/* simplified freezer_count() */
-	current->flags &= ~PF_FREEZER_SKIP;
+	freezer_count();
 }
 #endif
 
-- 
cgit v0.10.2


From 9b6fc5dc879bc90f765db0e95eefcf123d0d06dd Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Tue, 6 Dec 2011 23:24:38 +0100
Subject: PM / Sleep: Make [un]lock_system_sleep() generic

The [un]lock_system_sleep() APIs were originally introduced to mutually
exclude memory hotplug and hibernation.

Directly using mutex_lock(&pm_mutex) to achieve mutual exclusion with
suspend or hibernation code can lead to freezing failures. However, the
APIs [un]lock_system_sleep() can be safely used to achieve the same,
without causing freezing failures.

So, since it would be beneficial to modify all the existing users of
mutex_lock(&pm_mutex) (in all parts of the kernel), so that they use these
safe APIs intead, make these APIs generic by removing the restriction that
they work only when CONFIG_HIBERNATE_CALLBACKS is set. Moreover, that
restriction didn't buy us anything anyway.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 906d62c..95040cc 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -332,6 +332,8 @@ static inline bool system_entering_hibernation(void) { return false; }
 #define PM_RESTORE_PREPARE	0x0005 /* Going to restore a saved image */
 #define PM_POST_RESTORE		0x0006 /* Restore failed */
 
+extern struct mutex pm_mutex;
+
 #ifdef CONFIG_PM_SLEEP
 void save_processor_state(void);
 void restore_processor_state(void);
@@ -352,6 +354,19 @@ extern bool events_check_enabled;
 extern bool pm_wakeup_pending(void);
 extern bool pm_get_wakeup_count(unsigned int *count);
 extern bool pm_save_wakeup_count(unsigned int count);
+
+static inline void lock_system_sleep(void)
+{
+	freezer_do_not_count();
+	mutex_lock(&pm_mutex);
+}
+
+static inline void unlock_system_sleep(void)
+{
+	mutex_unlock(&pm_mutex);
+	freezer_count();
+}
+
 #else /* !CONFIG_PM_SLEEP */
 
 static inline int register_pm_notifier(struct notifier_block *nb)
@@ -367,30 +382,11 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
 static inline bool pm_wakeup_pending(void) { return false; }
-#endif /* !CONFIG_PM_SLEEP */
-
-extern struct mutex pm_mutex;
 
-#ifndef CONFIG_HIBERNATE_CALLBACKS
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
 
-#else
-
-/* Let some subsystems like memory hotadd exclude hibernation */
-
-static inline void lock_system_sleep(void)
-{
-	freezer_do_not_count();
-	mutex_lock(&pm_mutex);
-}
-
-static inline void unlock_system_sleep(void)
-{
-	mutex_unlock(&pm_mutex);
-	freezer_count();
-}
-#endif
+#endif /* !CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_ARCH_SAVE_PAGE_KEYS
 /*
-- 
cgit v0.10.2


From bcda53faf5814c0c6025a0bd47108adfcbe9f199 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Wed, 7 Dec 2011 22:29:54 +0100
Subject: PM / Sleep: Replace mutex_[un]lock(&pm_mutex) with
 [un]lock_system_sleep()

Using [un]lock_system_sleep() is safer than directly using mutex_[un]lock()
on 'pm_mutex', since the latter could lead to freezing failures. Hence convert
all the present users of mutex_[un]lock(&pm_mutex) to use these safe APIs
instead.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Reviewed-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/kernel/kexec.c b/kernel/kexec.c
index dc7bc08..090ee10 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1523,7 +1523,7 @@ int kernel_kexec(void)
 
 #ifdef CONFIG_KEXEC_JUMP
 	if (kexec_image->preserve_context) {
-		mutex_lock(&pm_mutex);
+		lock_system_sleep();
 		pm_prepare_console();
 		error = freeze_processes();
 		if (error) {
@@ -1576,7 +1576,7 @@ int kernel_kexec(void)
 		thaw_processes();
  Restore_console:
 		pm_restore_console();
-		mutex_unlock(&pm_mutex);
+		unlock_system_sleep();
 	}
 #endif
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 605149a..6d6d288 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -69,14 +69,14 @@ void hibernation_set_ops(const struct platform_hibernation_ops *ops)
 		WARN_ON(1);
 		return;
 	}
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 	hibernation_ops = ops;
 	if (ops)
 		hibernation_mode = HIBERNATION_PLATFORM;
 	else if (hibernation_mode == HIBERNATION_PLATFORM)
 		hibernation_mode = HIBERNATION_SHUTDOWN;
 
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 }
 
 static bool entering_platform_hibernation;
@@ -597,7 +597,7 @@ int hibernate(void)
 {
 	int error;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 	/* The snapshot device should not be opened while we're running */
 	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
 		error = -EBUSY;
@@ -665,7 +665,7 @@ int hibernate(void)
 	pm_restore_console();
 	atomic_inc(&snapshot_device_available);
  Unlock:
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 	return error;
 }
 
@@ -893,7 +893,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 	p = memchr(buf, '\n', n);
 	len = p ? p - buf : n;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
 		if (len == strlen(hibernation_modes[i])
 		    && !strncmp(buf, hibernation_modes[i], len)) {
@@ -919,7 +919,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (!error)
 		pr_debug("PM: Hibernation mode set to '%s'\n",
 			 hibernation_modes[mode]);
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 	return error ? error : n;
 }
 
@@ -946,9 +946,9 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (maj != MAJOR(res) || min != MINOR(res))
 		goto out;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 	swsusp_resume_device = res;
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 	printk(KERN_INFO "PM: Starting manual resume from disk\n");
 	noresume = 0;
 	software_resume();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 7d36fb3..9824b41e 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -116,7 +116,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
 	p = memchr(buf, '\n', n);
 	len = p ? p - buf : n;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 
 	level = TEST_FIRST;
 	for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++)
@@ -126,7 +126,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr,
 			break;
 		}
 
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 
 	return error ? error : n;
 }
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index d336b27..4fd51be 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -42,9 +42,9 @@ static const struct platform_suspend_ops *suspend_ops;
  */
 void suspend_set_ops(const struct platform_suspend_ops *ops)
 {
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 	suspend_ops = ops;
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 }
 EXPORT_SYMBOL_GPL(suspend_set_ops);
 
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 06ea33d..98ade21 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -71,7 +71,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 	struct snapshot_data *data;
 	int error;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 
 	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
 		error = -EBUSY;
@@ -123,7 +123,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 	data->platform_support = 0;
 
  Unlock:
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 
 	return error;
 }
@@ -132,7 +132,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
 {
 	struct snapshot_data *data;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 
 	swsusp_free();
 	free_basic_memory_bitmaps();
@@ -146,7 +146,7 @@ static int snapshot_release(struct inode *inode, struct file *filp)
 			PM_POST_HIBERNATION : PM_POST_RESTORE);
 	atomic_inc(&snapshot_device_available);
 
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 
 	return 0;
 }
@@ -158,7 +158,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
 	ssize_t res;
 	loff_t pg_offp = *offp & ~PAGE_MASK;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 
 	data = filp->private_data;
 	if (!data->ready) {
@@ -179,7 +179,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf,
 		*offp += res;
 
  Unlock:
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 
 	return res;
 }
@@ -191,7 +191,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
 	ssize_t res;
 	loff_t pg_offp = *offp & ~PAGE_MASK;
 
-	mutex_lock(&pm_mutex);
+	lock_system_sleep();
 
 	data = filp->private_data;
 
@@ -208,7 +208,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf,
 	if (res > 0)
 		*offp += res;
 unlock:
-	mutex_unlock(&pm_mutex);
+	unlock_system_sleep();
 
 	return res;
 }
-- 
cgit v0.10.2


From cba3176e88fa134ece3ae1cf7e35dab9972d7853 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Wed, 7 Dec 2011 22:30:09 +0100
Subject: PM / Sleep: Recommend [un]lock_system_sleep() over using pm_mutex
 directly

Update the documentation to explain the perils of directly using
mutex_[un]lock(&pm_mutex) and recommend the usage of the safe
APIs [un]lock_system_sleep() instead.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
index 3ab9fbd..6ccb68f 100644
--- a/Documentation/power/freezing-of-tasks.txt
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -176,3 +176,28 @@ tasks, since it generally exists anyway.
 A driver must have all firmwares it may need in RAM before suspend() is called.
 If keeping them is not practical, for example due to their size, they must be
 requested early enough using the suspend notifier API described in notifiers.txt.
+
+VI. Are there any precautions to be taken to prevent freezing failures?
+
+Yes, there are.
+
+First of all, grabbing the 'pm_mutex' lock to mutually exclude a piece of code
+from system-wide sleep such as suspend/hibernation is not encouraged.
+If possible, that piece of code must instead hook onto the suspend/hibernation
+notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code
+(kernel/cpu.c) for an example.
+
+However, if that is not feasible, and grabbing 'pm_mutex' is deemed necessary,
+it is strongly discouraged to directly call mutex_[un]lock(&pm_mutex) since
+that could lead to freezing failures, because if the suspend/hibernate code
+successfully acquired the 'pm_mutex' lock, and hence that other entity failed
+to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE
+state. As a consequence, the freezer would not be able to freeze that task,
+leading to freezing failure.
+
+However, the [un]lock_system_sleep() APIs are safe to use in this scenario,
+since they ask the freezer to skip freezing this task, since it is anyway
+"frozen enough" as it is blocked on 'pm_mutex', which will be released
+only after the entire suspend/hibernation sequence is complete.
+So, to summarize, use [un]lock_system_sleep() instead of directly using
+mutex_[un]lock(&pm_mutex). That would prevent freezing failures.
-- 
cgit v0.10.2


From b298d289c79211508f11cb50749b0d1d54eb244a Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Fri, 9 Dec 2011 23:36:36 +0100
Subject: PM / Sleep: Fix freezer failures due to racy
 usermodehelper_is_disabled()

Commit a144c6a (PM: Print a warning if firmware is requested when tasks
are frozen) introduced usermodehelper_is_disabled() to warn and exit
immediately if firmware is requested when usermodehelpers are disabled.

However, it is racy. Consider the following scenario, currently used in
drivers/base/firmware_class.c:

...
if (usermodehelper_is_disabled())
        goto out;

/* Do actual work */
...

out:
        return err;

Nothing prevents someone from disabling usermodehelpers just after the check
in the 'if' condition, which means that it is quite possible to try doing the
"actual work" with usermodehelpers disabled, leading to undesirable
consequences.

In particular, this race condition in _request_firmware() causes task freezing
failures whenever suspend/hibernation is in progress because, it wrongly waits
to get the firmware/microcode image from userspace when actually the
usermodehelpers are disabled or userspace has been frozen.
Some of the example scenarios that cause freezing failures due to this race
are those that depend on userspace via request_firmware(), such as x86
microcode module initialization and microcode image reload.

Previous discussions about this issue can be found at:
http://thread.gmane.org/gmane.linux.kernel/1198291/focus=1200591

This patch adds proper synchronization to fix this issue.

It is to be noted that this patchset fixes the freezing failures but doesn't
remove the warnings. IOW, it does not attempt to add explicit synchronization
to x86 microcode driver to avoid requesting microcode image at inopportune
moments. Because, the warnings were introduced to highlight such cases, in the
first place. And we need not silence the warnings, since we take care of the
*real* problem (freezing failure) and hence, after that, the warnings are
pretty harmless anyway.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 06ed6b4..d5585da 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -534,6 +534,8 @@ static int _request_firmware(const struct firmware **firmware_p,
 		return 0;
 	}
 
+	read_lock_usermodehelper();
+
 	if (WARN_ON(usermodehelper_is_disabled())) {
 		dev_err(device, "firmware: %s will not be loaded\n", name);
 		retval = -EBUSY;
@@ -572,6 +574,8 @@ static int _request_firmware(const struct firmware **firmware_p,
 	fw_destroy_instance(fw_priv);
 
 out:
+	read_unlock_usermodehelper();
+
 	if (retval) {
 		release_firmware(firmware);
 		*firmware_p = NULL;
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index b16f653..722f477 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -117,5 +117,7 @@ extern void usermodehelper_init(void);
 extern int usermodehelper_disable(void);
 extern void usermodehelper_enable(void);
 extern bool usermodehelper_is_disabled(void);
+extern void read_lock_usermodehelper(void);
+extern void read_unlock_usermodehelper(void);
 
 #endif /* __LINUX_KMOD_H__ */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index a4bea97..81b4a27 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -36,6 +36,7 @@
 #include <linux/resource.h>
 #include <linux/notifier.h>
 #include <linux/suspend.h>
+#include <linux/rwsem.h>
 #include <asm/uaccess.h>
 
 #include <trace/events/module.h>
@@ -50,6 +51,7 @@ static struct workqueue_struct *khelper_wq;
 static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
 static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
 static DEFINE_SPINLOCK(umh_sysctl_lock);
+static DECLARE_RWSEM(umhelper_sem);
 
 #ifdef CONFIG_MODULES
 
@@ -275,6 +277,7 @@ static void __call_usermodehelper(struct work_struct *work)
  * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
  * (used for preventing user land processes from being created after the user
  * land has been frozen during a system-wide hibernation or suspend operation).
+ * Should always be manipulated under umhelper_sem acquired for write.
  */
 static int usermodehelper_disabled = 1;
 
@@ -293,6 +296,18 @@ static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
  */
 #define RUNNING_HELPERS_TIMEOUT	(5 * HZ)
 
+void read_lock_usermodehelper(void)
+{
+	down_read(&umhelper_sem);
+}
+EXPORT_SYMBOL_GPL(read_lock_usermodehelper);
+
+void read_unlock_usermodehelper(void)
+{
+	up_read(&umhelper_sem);
+}
+EXPORT_SYMBOL_GPL(read_unlock_usermodehelper);
+
 /**
  * usermodehelper_disable - prevent new helpers from being started
  */
@@ -300,8 +315,10 @@ int usermodehelper_disable(void)
 {
 	long retval;
 
+	down_write(&umhelper_sem);
 	usermodehelper_disabled = 1;
-	smp_mb();
+	up_write(&umhelper_sem);
+
 	/*
 	 * From now on call_usermodehelper_exec() won't start any new
 	 * helpers, so it is sufficient if running_helpers turns out to
@@ -314,7 +331,9 @@ int usermodehelper_disable(void)
 	if (retval)
 		return 0;
 
+	down_write(&umhelper_sem);
 	usermodehelper_disabled = 0;
+	up_write(&umhelper_sem);
 	return -EAGAIN;
 }
 
@@ -323,7 +342,9 @@ int usermodehelper_disable(void)
  */
 void usermodehelper_enable(void)
 {
+	down_write(&umhelper_sem);
 	usermodehelper_disabled = 0;
+	up_write(&umhelper_sem);
 }
 
 /**
-- 
cgit v0.10.2


From cf007e3526a785a95a738d5a8fba44f1f4fe33e0 Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Thu, 8 Dec 2011 23:42:53 +0100
Subject: PM / Hibernate: Remove deprecated hibernation snapshot ioctls

Several snapshot ioctls were marked for removal quite some time ago,
since they were deprecated. Remove them.

Suggested-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 3d84912..9f51fc4 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -85,17 +85,6 @@ Who:	Robin Getz <rgetz@blackfin.uclinux.org> & Matt Mackall <mpm@selenic.com>
 
 ---------------------------
 
-What:	Deprecated snapshot ioctls
-When:	2.6.36
-
-Why:	The ioctls in kernel/power/user.c were marked as deprecated long time
-	ago. Now they notify users about that so that they need to replace
-	their userspace. After some more time, remove them completely.
-
-Who:	Jiri Slaby <jirislaby@gmail.com>
-
----------------------------
-
 What:	The ieee80211_regdom module parameter
 When:	March 2010 / desktop catchup
 
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 98ade21..78bdb44 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -30,28 +30,6 @@
 
 #include "power.h"
 
-/*
- * NOTE: The SNAPSHOT_SET_SWAP_FILE and SNAPSHOT_PMOPS ioctls are obsolete and
- * will be removed in the future.  They are only preserved here for
- * compatibility with existing userland utilities.
- */
-#define SNAPSHOT_SET_SWAP_FILE	_IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int)
-#define SNAPSHOT_PMOPS		_IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int)
-
-#define PMOPS_PREPARE	1
-#define PMOPS_ENTER	2
-#define PMOPS_FINISH	3
-
-/*
- * NOTE: The following ioctl definitions are wrong and have been replaced with
- * correct ones.  They are only preserved here for compatibility with existing
- * userland utilities and will be removed in the future.
- */
-#define SNAPSHOT_ATOMIC_SNAPSHOT	_IOW(SNAPSHOT_IOC_MAGIC, 3, void *)
-#define SNAPSHOT_SET_IMAGE_SIZE		_IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long)
-#define SNAPSHOT_AVAIL_SWAP		_IOR(SNAPSHOT_IOC_MAGIC, 7, void *)
-#define SNAPSHOT_GET_SWAP_PAGE		_IOR(SNAPSHOT_IOC_MAGIC, 8, void *)
-
 
 #define SNAPSHOT_MINOR	231
 
@@ -213,15 +191,6 @@ unlock:
 	return res;
 }
 
-static void snapshot_deprecated_ioctl(unsigned int cmd)
-{
-	if (printk_ratelimit())
-		printk(KERN_NOTICE "%pf: ioctl '%.8x' is deprecated and will "
-				"be removed soon, update your suspend-to-disk "
-				"utilities\n",
-				__builtin_return_address(0), cmd);
-}
-
 static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 							unsigned long arg)
 {
@@ -272,8 +241,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		data->frozen = 0;
 		break;
 
-	case SNAPSHOT_ATOMIC_SNAPSHOT:
-		snapshot_deprecated_ioctl(cmd);
 	case SNAPSHOT_CREATE_IMAGE:
 		if (data->mode != O_RDONLY || !data->frozen  || data->ready) {
 			error = -EPERM;
@@ -308,8 +275,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		data->ready = 0;
 		break;
 
-	case SNAPSHOT_SET_IMAGE_SIZE:
-		snapshot_deprecated_ioctl(cmd);
 	case SNAPSHOT_PREF_IMAGE_SIZE:
 		image_size = arg;
 		break;
@@ -324,16 +289,12 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		error = put_user(size, (loff_t __user *)arg);
 		break;
 
-	case SNAPSHOT_AVAIL_SWAP:
-		snapshot_deprecated_ioctl(cmd);
 	case SNAPSHOT_AVAIL_SWAP_SIZE:
 		size = count_swap_pages(data->swap, 1);
 		size <<= PAGE_SHIFT;
 		error = put_user(size, (loff_t __user *)arg);
 		break;
 
-	case SNAPSHOT_GET_SWAP_PAGE:
-		snapshot_deprecated_ioctl(cmd);
 	case SNAPSHOT_ALLOC_SWAP_PAGE:
 		if (data->swap < 0 || data->swap >= MAX_SWAPFILES) {
 			error = -ENODEV;
@@ -356,27 +317,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 		free_all_swap_pages(data->swap);
 		break;
 
-	case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */
-		snapshot_deprecated_ioctl(cmd);
-		if (!swsusp_swap_in_use()) {
-			/*
-			 * User space encodes device types as two-byte values,
-			 * so we need to recode them
-			 */
-			if (old_decode_dev(arg)) {
-				data->swap = swap_type_of(old_decode_dev(arg),
-							0, NULL);
-				if (data->swap < 0)
-					error = -ENODEV;
-			} else {
-				data->swap = -1;
-				error = -EINVAL;
-			}
-		} else {
-			error = -EPERM;
-		}
-		break;
-
 	case SNAPSHOT_S2RAM:
 		if (!data->frozen) {
 			error = -EPERM;
@@ -399,33 +339,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 			error = hibernation_platform_enter();
 		break;
 
-	case SNAPSHOT_PMOPS: /* This ioctl is deprecated */
-		snapshot_deprecated_ioctl(cmd);
-		error = -EINVAL;
-
-		switch (arg) {
-
-		case PMOPS_PREPARE:
-			data->platform_support = 1;
-			error = 0;
-			break;
-
-		case PMOPS_ENTER:
-			if (data->platform_support)
-				error = hibernation_platform_enter();
-			break;
-
-		case PMOPS_FINISH:
-			if (data->platform_support)
-				error = 0;
-			break;
-
-		default:
-			printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg);
-
-		}
-		break;
-
 	case SNAPSHOT_SET_SWAP_AREA:
 		if (swsusp_swap_in_use()) {
 			error = -EPERM;
-- 
cgit v0.10.2


From 8ca6d9bcc8d33c592c0855b4b1481bc723ac7e85 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 15 Dec 2011 20:59:23 +0100
Subject: PM / Sleep: Simplify generic system suspend callbacks

The pm_runtime_suspended() check in __pm_generic_call() doesn't
really help and may cause problems to happen, because in some cases
the system suspend callbacks need to be called even if the given
device has been suspended by runtime PM.  For example, if the device
generally supports remote wakeup and is not enabled to wake up
the system from sleep, it should be prevented from generating wakeup
signals during system suspend and that has to be done by the
suspend callbacks that the pm_runtime_suspended() check prevents from
being executed.

Similarly, it may not be a good idea to unconditionally change
the runtime PM status of the device to 'active' in
__pm_generic_resume(), because the driver may want to leave the
device in the 'suspended' state, depending on what happened to it
before the system suspend and whether or not it is enabled to
wake up the system.

For the above reasons, remove the pm_runtime_suspended()
check from __pm_generic_call() and remove the code changing the
device's runtime PM status from __pm_generic_resume().

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 265a0ee..1b878b955 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -97,16 +97,16 @@ int pm_generic_prepare(struct device *dev)
  * @event: PM transition of the system under way.
  * @bool: Whether or not this is the "noirq" stage.
  *
- * If the device has not been suspended at run time, execute the
- * suspend/freeze/poweroff/thaw callback provided by its driver, if defined, and
- * return its error code.  Otherwise, return zero.
+ * Execute the suspend/freeze/poweroff/thaw callback provided by the driver of
+ * @dev, if defined, and return its error code.    Return 0 if the callback is
+ * not present.
  */
 static int __pm_generic_call(struct device *dev, int event, bool noirq)
 {
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int (*callback)(struct device *);
 
-	if (!pm || pm_runtime_suspended(dev))
+	if (!pm)
 		return 0;
 
 	switch (event) {
@@ -217,14 +217,12 @@ EXPORT_SYMBOL_GPL(pm_generic_thaw);
  * @bool: Whether or not this is the "noirq" stage.
  *
  * Execute the resume/resotre callback provided by the @dev's driver, if
- * defined.  If it returns 0, change the device's runtime PM status to 'active'.
- * Return the callback's error code.
+ * defined, and return its error code.  Return 0 if the callback is not present.
  */
 static int __pm_generic_resume(struct device *dev, int event, bool noirq)
 {
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int (*callback)(struct device *);
-	int ret;
 
 	if (!pm)
 		return 0;
@@ -241,17 +239,7 @@ static int __pm_generic_resume(struct device *dev, int event, bool noirq)
 		break;
 	}
 
-	if (!callback)
-		return 0;
-
-	ret = callback(dev);
-	if (!ret && !noirq && pm_runtime_enabled(dev)) {
-		pm_runtime_disable(dev);
-		pm_runtime_set_active(dev);
-		pm_runtime_enable(dev);
-	}
-
-	return ret;
+	return callback ? callback(dev) : 0;
 }
 
 /**
-- 
cgit v0.10.2


From 1eac8111e0763853266a171ce11214da3a347a0a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 15 Dec 2011 20:59:30 +0100
Subject: PM / Sleep: Merge internal functions in generic_ops.c

After the change that removed the code related to runtime PM
from __pm_generic_call() and __pm_generic_resume() these two
functions need not be separate any more, so merge them.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 1b878b955..5a5b154 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -97,7 +97,7 @@ int pm_generic_prepare(struct device *dev)
  * @event: PM transition of the system under way.
  * @bool: Whether or not this is the "noirq" stage.
  *
- * Execute the suspend/freeze/poweroff/thaw callback provided by the driver of
+ * Execute the PM callback corresponding to @event provided by the driver of
  * @dev, if defined, and return its error code.    Return 0 if the callback is
  * not present.
  */
@@ -119,9 +119,15 @@ static int __pm_generic_call(struct device *dev, int event, bool noirq)
 	case PM_EVENT_HIBERNATE:
 		callback = noirq ? pm->poweroff_noirq : pm->poweroff;
 		break;
+	case PM_EVENT_RESUME:
+		callback = noirq ? pm->resume_noirq : pm->resume;
+		break;
 	case PM_EVENT_THAW:
 		callback = noirq ? pm->thaw_noirq : pm->thaw;
 		break;
+	case PM_EVENT_RESTORE:
+		callback = noirq ? pm->restore_noirq : pm->restore;
+		break;
 	default:
 		callback = NULL;
 		break;
@@ -211,44 +217,12 @@ int pm_generic_thaw(struct device *dev)
 EXPORT_SYMBOL_GPL(pm_generic_thaw);
 
 /**
- * __pm_generic_resume - Generic resume/restore callback for subsystems.
- * @dev: Device to handle.
- * @event: PM transition of the system under way.
- * @bool: Whether or not this is the "noirq" stage.
- *
- * Execute the resume/resotre callback provided by the @dev's driver, if
- * defined, and return its error code.  Return 0 if the callback is not present.
- */
-static int __pm_generic_resume(struct device *dev, int event, bool noirq)
-{
-	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
-	int (*callback)(struct device *);
-
-	if (!pm)
-		return 0;
-
-	switch (event) {
-	case PM_EVENT_RESUME:
-		callback = noirq ? pm->resume_noirq : pm->resume;
-		break;
-	case PM_EVENT_RESTORE:
-		callback = noirq ? pm->restore_noirq : pm->restore;
-		break;
-	default:
-		callback = NULL;
-		break;
-	}
-
-	return callback ? callback(dev) : 0;
-}
-
-/**
  * pm_generic_resume_noirq - Generic resume_noirq callback for subsystems.
  * @dev: Device to resume.
  */
 int pm_generic_resume_noirq(struct device *dev)
 {
-	return __pm_generic_resume(dev, PM_EVENT_RESUME, true);
+	return __pm_generic_call(dev, PM_EVENT_RESUME, true);
 }
 EXPORT_SYMBOL_GPL(pm_generic_resume_noirq);
 
@@ -258,7 +232,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume_noirq);
  */
 int pm_generic_resume(struct device *dev)
 {
-	return __pm_generic_resume(dev, PM_EVENT_RESUME, false);
+	return __pm_generic_call(dev, PM_EVENT_RESUME, false);
 }
 EXPORT_SYMBOL_GPL(pm_generic_resume);
 
@@ -268,7 +242,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume);
  */
 int pm_generic_restore_noirq(struct device *dev)
 {
-	return __pm_generic_resume(dev, PM_EVENT_RESTORE, true);
+	return __pm_generic_call(dev, PM_EVENT_RESTORE, true);
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore_noirq);
 
@@ -278,7 +252,7 @@ EXPORT_SYMBOL_GPL(pm_generic_restore_noirq);
  */
 int pm_generic_restore(struct device *dev)
 {
-	return __pm_generic_resume(dev, PM_EVENT_RESTORE, false);
+	return __pm_generic_call(dev, PM_EVENT_RESTORE, false);
 }
 EXPORT_SYMBOL_GPL(pm_generic_restore);
 
-- 
cgit v0.10.2


From 9cf519d1c15fa05a538c2b3963c5f3903daf765a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 18 Dec 2011 00:34:01 +0100
Subject: PM / Sleep: Make pm_op() and pm_noirq_op() return callback pointers

Make the pm_op() and pm_noirq_op() functions return pointers to
appropriate callbacks instead of executing those callbacks and
returning their results.

This change is required for a subsequent modification that will
execute the corresponding driver callback if the subsystem
callback returned by either pm_op(), or pm_noirq_op() is NULL.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index b570189..b5cef7e 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -32,6 +32,8 @@
 #include "../base.h"
 #include "power.h"
 
+typedef int (*pm_callback_t)(struct device *);
+
 /*
  * The entries in the dpm_list list are in a depth first order, simply
  * because children are guaranteed to be discovered after parents, and
@@ -211,113 +213,70 @@ static void dpm_wait_for_children(struct device *dev, bool async)
        device_for_each_child(dev, &async, dpm_wait_fn);
 }
 
-static int dpm_run_callback(struct device *dev, int (*cb)(struct device *))
-{
-	ktime_t calltime;
-	int error;
-
-	if (!cb)
-		return 0;
-
-	calltime = initcall_debug_start(dev);
-
-	error = cb(dev);
-	suspend_report_result(cb, error);
-
-	initcall_debug_report(dev, calltime, error);
-
-	return error;
-}
-
 /**
- * pm_op - Execute the PM operation appropriate for given PM event.
- * @dev: Device to handle.
+ * pm_op - Return the PM operation appropriate for given PM event.
  * @ops: PM operations to choose from.
  * @state: PM transition of the system being carried out.
  */
-static int pm_op(struct device *dev,
-		 const struct dev_pm_ops *ops,
-		 pm_message_t state)
+static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state)
 {
-	int error = 0;
-
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
 	case PM_EVENT_SUSPEND:
-		error = dpm_run_callback(dev, ops->suspend);
-		break;
+		return ops->suspend;
 	case PM_EVENT_RESUME:
-		error = dpm_run_callback(dev, ops->resume);
-		break;
+		return ops->resume;
 #endif /* CONFIG_SUSPEND */
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
-		error = dpm_run_callback(dev, ops->freeze);
-		break;
+		return ops->freeze;
 	case PM_EVENT_HIBERNATE:
-		error = dpm_run_callback(dev, ops->poweroff);
-		break;
+		return ops->poweroff;
 	case PM_EVENT_THAW:
 	case PM_EVENT_RECOVER:
-		error = dpm_run_callback(dev, ops->thaw);
+		return ops->thaw;
 		break;
 	case PM_EVENT_RESTORE:
-		error = dpm_run_callback(dev, ops->restore);
-		break;
+		return ops->restore;
 #endif /* CONFIG_HIBERNATE_CALLBACKS */
-	default:
-		error = -EINVAL;
 	}
 
-	return error;
+	return NULL;
 }
 
 /**
- * pm_noirq_op - Execute the PM operation appropriate for given PM event.
- * @dev: Device to handle.
+ * pm_noirq_op - Return the PM operation appropriate for given PM event.
  * @ops: PM operations to choose from.
  * @state: PM transition of the system being carried out.
  *
  * The driver of @dev will not receive interrupts while this function is being
  * executed.
  */
-static int pm_noirq_op(struct device *dev,
-			const struct dev_pm_ops *ops,
-			pm_message_t state)
+static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t state)
 {
-	int error = 0;
-
 	switch (state.event) {
 #ifdef CONFIG_SUSPEND
 	case PM_EVENT_SUSPEND:
-		error = dpm_run_callback(dev, ops->suspend_noirq);
-		break;
+		return ops->suspend_noirq;
 	case PM_EVENT_RESUME:
-		error = dpm_run_callback(dev, ops->resume_noirq);
-		break;
+		return ops->resume_noirq;
 #endif /* CONFIG_SUSPEND */
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 	case PM_EVENT_FREEZE:
 	case PM_EVENT_QUIESCE:
-		error = dpm_run_callback(dev, ops->freeze_noirq);
-		break;
+		return ops->freeze_noirq;
 	case PM_EVENT_HIBERNATE:
-		error = dpm_run_callback(dev, ops->poweroff_noirq);
-		break;
+		return ops->poweroff_noirq;
 	case PM_EVENT_THAW:
 	case PM_EVENT_RECOVER:
-		error = dpm_run_callback(dev, ops->thaw_noirq);
-		break;
+		return ops->thaw_noirq;
 	case PM_EVENT_RESTORE:
-		error = dpm_run_callback(dev, ops->restore_noirq);
-		break;
+		return ops->restore_noirq;
 #endif /* CONFIG_HIBERNATE_CALLBACKS */
-	default:
-		error = -EINVAL;
 	}
 
-	return error;
+	return NULL;
 }
 
 static char *pm_verb(int event)
@@ -375,6 +334,26 @@ static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info)
 		usecs / USEC_PER_MSEC, usecs % USEC_PER_MSEC);
 }
 
+static int dpm_run_callback(pm_callback_t cb, struct device *dev,
+			    pm_message_t state, char *info)
+{
+	ktime_t calltime;
+	int error;
+
+	if (!cb)
+		return 0;
+
+	calltime = initcall_debug_start(dev);
+
+	pm_dev_dbg(dev, state, info);
+	error = cb(dev);
+	suspend_report_result(cb, error);
+
+	initcall_debug_report(dev, calltime, error);
+
+	return error;
+}
+
 /*------------------------- Resume routines -------------------------*/
 
 /**
@@ -387,25 +366,29 @@ static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info)
  */
 static int device_resume_noirq(struct device *dev, pm_message_t state)
 {
+	pm_callback_t callback = NULL;
+	char *info = NULL;
 	int error = 0;
 
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
 	if (dev->pm_domain) {
-		pm_dev_dbg(dev, state, "EARLY power domain ");
-		error = pm_noirq_op(dev, &dev->pm_domain->ops, state);
+		info = "EARLY power domain ";
+		callback = pm_noirq_op(&dev->pm_domain->ops, state);
 	} else if (dev->type && dev->type->pm) {
-		pm_dev_dbg(dev, state, "EARLY type ");
-		error = pm_noirq_op(dev, dev->type->pm, state);
+		info = "EARLY type ";
+		callback = pm_noirq_op(dev->type->pm, state);
 	} else if (dev->class && dev->class->pm) {
-		pm_dev_dbg(dev, state, "EARLY class ");
-		error = pm_noirq_op(dev, dev->class->pm, state);
+		info = "EARLY class ";
+		callback = pm_noirq_op(dev->class->pm, state);
 	} else if (dev->bus && dev->bus->pm) {
-		pm_dev_dbg(dev, state, "EARLY ");
-		error = pm_noirq_op(dev, dev->bus->pm, state);
+		info = "EARLY ";
+		callback = pm_noirq_op(dev->bus->pm, state);
 	}
 
+	error = dpm_run_callback(callback, dev, state, info);
+
 	TRACE_RESUME(error);
 	return error;
 }
@@ -455,6 +438,8 @@ EXPORT_SYMBOL_GPL(dpm_resume_noirq);
  */
 static int device_resume(struct device *dev, pm_message_t state, bool async)
 {
+	pm_callback_t callback = NULL;
+	char *info = NULL;
 	int error = 0;
 	bool put = false;
 
@@ -477,40 +462,41 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	put = true;
 
 	if (dev->pm_domain) {
-		pm_dev_dbg(dev, state, "power domain ");
-		error = pm_op(dev, &dev->pm_domain->ops, state);
+		info = "power domain ";
+		callback = pm_op(&dev->pm_domain->ops, state);
 		goto End;
 	}
 
 	if (dev->type && dev->type->pm) {
-		pm_dev_dbg(dev, state, "type ");
-		error = pm_op(dev, dev->type->pm, state);
+		info = "type ";
+		callback = pm_op(dev->type->pm, state);
 		goto End;
 	}
 
 	if (dev->class) {
 		if (dev->class->pm) {
-			pm_dev_dbg(dev, state, "class ");
-			error = pm_op(dev, dev->class->pm, state);
+			info = "class ";
+			callback = pm_op(dev->class->pm, state);
 			goto End;
 		} else if (dev->class->resume) {
-			pm_dev_dbg(dev, state, "legacy class ");
-			error = dpm_run_callback(dev, dev->class->resume);
+			info = "legacy class ";
+			callback = dev->class->resume;
 			goto End;
 		}
 	}
 
 	if (dev->bus) {
 		if (dev->bus->pm) {
-			pm_dev_dbg(dev, state, "");
-			error = pm_op(dev, dev->bus->pm, state);
+			info = "";
+			callback = pm_op(dev->bus->pm, state);
 		} else if (dev->bus->resume) {
-			pm_dev_dbg(dev, state, "legacy ");
-			error = dpm_run_callback(dev, dev->bus->resume);
+			info = "legacy ";
+			callback = dev->bus->resume;
 		}
 	}
 
  End:
+	error = dpm_run_callback(callback, dev, state, info);
 	dev->power.is_suspended = false;
 
  Unlock:
@@ -705,23 +691,24 @@ static pm_message_t resume_event(pm_message_t sleep_state)
  */
 static int device_suspend_noirq(struct device *dev, pm_message_t state)
 {
-	int error = 0;
+	pm_callback_t callback = NULL;
+	char *info = NULL;
 
 	if (dev->pm_domain) {
-		pm_dev_dbg(dev, state, "LATE power domain ");
-		error = pm_noirq_op(dev, &dev->pm_domain->ops, state);
+		info = "LATE power domain ";
+		callback = pm_noirq_op(&dev->pm_domain->ops, state);
 	} else if (dev->type && dev->type->pm) {
-		pm_dev_dbg(dev, state, "LATE type ");
-		error = pm_noirq_op(dev, dev->type->pm, state);
+		info = "LATE type ";
+		callback = pm_noirq_op(dev->type->pm, state);
 	} else if (dev->class && dev->class->pm) {
-		pm_dev_dbg(dev, state, "LATE class ");
-		error = pm_noirq_op(dev, dev->class->pm, state);
+		info = "LATE class ";
+		callback = pm_noirq_op(dev->class->pm, state);
 	} else if (dev->bus && dev->bus->pm) {
-		pm_dev_dbg(dev, state, "LATE ");
-		error = pm_noirq_op(dev, dev->bus->pm, state);
+		info = "LATE ";
+		callback = pm_noirq_op(dev->bus->pm, state);
 	}
 
-	return error;
+	return dpm_run_callback(callback, dev, state, info);
 }
 
 /**
@@ -798,6 +785,8 @@ static int legacy_suspend(struct device *dev, pm_message_t state,
  */
 static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 {
+	pm_callback_t callback = NULL;
+	char *info = NULL;
 	int error = 0;
 
 	dpm_wait_for_children(dev, async);
@@ -818,22 +807,22 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	device_lock(dev);
 
 	if (dev->pm_domain) {
-		pm_dev_dbg(dev, state, "power domain ");
-		error = pm_op(dev, &dev->pm_domain->ops, state);
-		goto End;
+		info = "power domain ";
+		callback = pm_op(&dev->pm_domain->ops, state);
+		goto Run;
 	}
 
 	if (dev->type && dev->type->pm) {
-		pm_dev_dbg(dev, state, "type ");
-		error = pm_op(dev, dev->type->pm, state);
-		goto End;
+		info = "type ";
+		callback = pm_op(dev->type->pm, state);
+		goto Run;
 	}
 
 	if (dev->class) {
 		if (dev->class->pm) {
-			pm_dev_dbg(dev, state, "class ");
-			error = pm_op(dev, dev->class->pm, state);
-			goto End;
+			info = "class ";
+			callback = pm_op(dev->class->pm, state);
+			goto Run;
 		} else if (dev->class->suspend) {
 			pm_dev_dbg(dev, state, "legacy class ");
 			error = legacy_suspend(dev, state, dev->class->suspend);
@@ -843,14 +832,18 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 
 	if (dev->bus) {
 		if (dev->bus->pm) {
-			pm_dev_dbg(dev, state, "");
-			error = pm_op(dev, dev->bus->pm, state);
+			info = "";
+			callback = pm_op(dev->bus->pm, state);
 		} else if (dev->bus->suspend) {
 			pm_dev_dbg(dev, state, "legacy ");
 			error = legacy_suspend(dev, state, dev->bus->suspend);
+			goto End;
 		}
 	}
 
+ Run:
+	error = dpm_run_callback(callback, dev, state, info);
+
  End:
 	if (!error) {
 		dev->power.is_suspended = true;
-- 
cgit v0.10.2


From 35cd133c6130c1eb52806808abee9d62e6854a27 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 18 Dec 2011 00:34:13 +0100
Subject: PM: Run the driver callback directly if the subsystem one is not
 there

Make the PM core execute driver PM callbacks directly if the
corresponding subsystem callbacks are not present.

There are three reasons for doing that.  First, it reflects the
behavior of drivers/base/dd.c:really_probe() that runs the driver's
.probe() callback directly if the bus type's one is not defined, so
this change will remove one arbitrary difference between the PM core
and the remaining parts of the driver core.  Second, it will allow
some subsystems, whose PM callbacks don't do anything except for
executing driver callbacks, to be simplified quite a bit by removing
those "forward-only" callbacks.  Finally, it will allow us to remove
one level of indirection in the system suspend and resume code paths
where it is not necessary, which is going to lead to less debug noise
with initcall_debug passed in the kernel command line (messages won't
be printed for driverless devices whose subsystems don't provide
PM callbacks among other things).

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 3139fb5..20af7de 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -126,7 +126,9 @@ The core methods to suspend and resume devices reside in struct dev_pm_ops
 pointed to by the ops member of struct dev_pm_domain, or by the pm member of
 struct bus_type, struct device_type and struct class.  They are mostly of
 interest to the people writing infrastructure for platforms and buses, like PCI
-or USB, or device type and device class drivers.
+or USB, or device type and device class drivers.  They also are relevant to the
+writers of device drivers whose subsystems (PM domains, device types, device
+classes and bus types) don't provide all power management methods.
 
 Bus drivers implement these methods as appropriate for the hardware and the
 drivers using it; PCI works differently from USB, and so on.  Not many people
@@ -268,32 +270,35 @@ various phases always run after tasks have been frozen and before they are
 unfrozen.  Furthermore, the *_noirq phases run at a time when IRQ handlers have
 been disabled (except for those marked with the IRQF_NO_SUSPEND flag).
 
-All phases use PM domain, bus, type, or class callbacks (that is, methods
-defined in dev->pm_domain->ops, dev->bus->pm, dev->type->pm, or dev->class->pm).
-These callbacks are regarded by the PM core as mutually exclusive.  Moreover,
-PM domain callbacks always take precedence over bus, type and class callbacks,
-while type callbacks take precedence over bus and class callbacks, and class
-callbacks take precedence over bus callbacks.  To be precise, the following
-rules are used to determine which callback to execute in the given phase:
+All phases use PM domain, bus, type, class or driver callbacks (that is, methods
+defined in dev->pm_domain->ops, dev->bus->pm, dev->type->pm, dev->class->pm or
+dev->driver->pm).  These callbacks are regarded by the PM core as mutually
+exclusive.  Moreover, PM domain callbacks always take precedence over all of the
+other callbacks and, for example, type callbacks take precedence over bus, class
+and driver callbacks.  To be precise, the following rules are used to determine
+which callback to execute in the given phase:
 
-    1.	If dev->pm_domain is present, the PM core will attempt to execute the
-	callback included in dev->pm_domain->ops.  If that callback is not
-	present, no action will be carried out for the given device.
+    1.	If dev->pm_domain is present, the PM core will choose the callback
+	included in dev->pm_domain->ops for execution
 
     2.	Otherwise, if both dev->type and dev->type->pm are present, the callback
-	included in dev->type->pm will be executed.
+	included in dev->type->pm will be chosen for execution.
 
     3.	Otherwise, if both dev->class and dev->class->pm are present, the
-	callback included in dev->class->pm will be executed.
+	callback included in dev->class->pm will be chosen for execution.
 
     4.	Otherwise, if both dev->bus and dev->bus->pm are present, the callback
-	included in dev->bus->pm will be executed.
+	included in dev->bus->pm will be chosen for execution.
 
 This allows PM domains and device types to override callbacks provided by bus
 types or device classes if necessary.
 
-These callbacks may in turn invoke device- or driver-specific methods stored in
-dev->driver->pm, but they don't have to.
+The PM domain, type, class and bus callbacks may in turn invoke device- or
+driver-specific methods stored in dev->driver->pm, but they don't have to do
+that.
+
+If the subsystem callback chosen for execution is not present, the PM core will
+execute the corresponding method from dev->driver->pm instead if there is one.
 
 
 Entering System Suspend
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index c2ae8bf..4abe83e 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -57,6 +57,10 @@ the following:
 
   4. Bus type of the device, if both dev->bus and dev->bus->pm are present.
 
+If the subsystem chosen by applying the above rules doesn't provide the relevant
+callback, the PM core will invoke the corresponding driver callback stored in
+dev->driver->pm directly (if present).
+
 The PM core always checks which callback to use in the order given above, so the
 priority order of callbacks from high to low is: PM domain, device type, class
 and bus type.  Moreover, the high-priority one will always take precedence over
@@ -64,86 +68,88 @@ a low-priority one.  The PM domain, bus type, device type and class callbacks
 are referred to as subsystem-level callbacks in what follows.
 
 By default, the callbacks are always invoked in process context with interrupts
-enabled.  However, subsystems can use the pm_runtime_irq_safe() helper function
-to tell the PM core that their ->runtime_suspend(), ->runtime_resume() and
-->runtime_idle() callbacks may be invoked in atomic context with interrupts
-disabled for a given device.  This implies that the callback routines in
-question must not block or sleep, but it also means that the synchronous helper
-functions listed at the end of Section 4 may be used for that device within an
-interrupt handler or generally in an atomic context.
-
-The subsystem-level suspend callback is _entirely_ _responsible_ for handling
-the suspend of the device as appropriate, which may, but need not include
-executing the device driver's own ->runtime_suspend() callback (from the
+enabled.  However, the pm_runtime_irq_safe() helper function can be used to tell
+the PM core that it is safe to run the ->runtime_suspend(), ->runtime_resume()
+and ->runtime_idle() callbacks for the given device in atomic context with
+interrupts disabled.  This implies that the callback routines in question must
+not block or sleep, but it also means that the synchronous helper functions
+listed at the end of Section 4 may be used for that device within an interrupt
+handler or generally in an atomic context.
+
+The subsystem-level suspend callback, if present, is _entirely_ _responsible_
+for handling the suspend of the device as appropriate, which may, but need not
+include executing the device driver's own ->runtime_suspend() callback (from the
 PM core's point of view it is not necessary to implement a ->runtime_suspend()
 callback in a device driver as long as the subsystem-level suspend callback
 knows what to do to handle the device).
 
-  * Once the subsystem-level suspend callback has completed successfully
-    for given device, the PM core regards the device as suspended, which need
-    not mean that the device has been put into a low power state.  It is
-    supposed to mean, however, that the device will not process data and will
-    not communicate with the CPU(s) and RAM until the subsystem-level resume
-    callback is executed for it.  The runtime PM status of a device after
-    successful execution of the subsystem-level suspend callback is 'suspended'.
-
-  * If the subsystem-level suspend callback returns -EBUSY or -EAGAIN,
-    the device's runtime PM status is 'active', which means that the device
-    _must_ be fully operational afterwards.
-
-  * If the subsystem-level suspend callback returns an error code different
-    from -EBUSY or -EAGAIN, the PM core regards this as a fatal error and will
-    refuse to run the helper functions described in Section 4 for the device,
-    until the status of it is directly set either to 'active', or to 'suspended'
-    (the PM core provides special helper functions for this purpose).
-
-In particular, if the driver requires remote wake-up capability (i.e. hardware
+  * Once the subsystem-level suspend callback (or the driver suspend callback,
+    if invoked directly) has completed successfully for the given device, the PM
+    core regards the device as suspended, which need not mean that it has been
+    put into a low power state.  It is supposed to mean, however, that the
+    device will not process data and will not communicate with the CPU(s) and
+    RAM until the appropriate resume callback is executed for it.  The runtime
+    PM status of a device after successful execution of the suspend callback is
+    'suspended'.
+
+  * If the suspend callback returns -EBUSY or -EAGAIN, the device's runtime PM
+    status remains 'active', which means that the device _must_ be fully
+    operational afterwards.
+
+  * If the suspend callback returns an error code different from -EBUSY and
+    -EAGAIN, the PM core regards this as a fatal error and will refuse to run
+    the helper functions described in Section 4 for the device until its status
+    is directly set to  either'active', or 'suspended' (the PM core provides
+    special helper functions for this purpose).
+
+In particular, if the driver requires remote wakeup capability (i.e. hardware
 mechanism allowing the device to request a change of its power state, such as
 PCI PME) for proper functioning and device_run_wake() returns 'false' for the
 device, then ->runtime_suspend() should return -EBUSY.  On the other hand, if
-device_run_wake() returns 'true' for the device and the device is put into a low
-power state during the execution of the subsystem-level suspend callback, it is
-expected that remote wake-up will be enabled for the device.  Generally, remote
-wake-up should be enabled for all input devices put into a low power state at
-run time.
-
-The subsystem-level resume callback is _entirely_ _responsible_ for handling the
-resume of the device as appropriate, which may, but need not include executing
-the device driver's own ->runtime_resume() callback (from the PM core's point of
-view it is not necessary to implement a ->runtime_resume() callback in a device
-driver as long as the subsystem-level resume callback knows what to do to handle
-the device).
-
-  * Once the subsystem-level resume callback has completed successfully, the PM
-    core regards the device as fully operational, which means that the device
-    _must_ be able to complete I/O operations as needed.  The runtime PM status
-    of the device is then 'active'.
-
-  * If the subsystem-level resume callback returns an error code, the PM core
-    regards this as a fatal error and will refuse to run the helper functions
-    described in Section 4 for the device, until its status is directly set
-    either to 'active' or to 'suspended' (the PM core provides special helper
-    functions for this purpose).
-
-The subsystem-level idle callback is executed by the PM core whenever the device
-appears to be idle, which is indicated to the PM core by two counters, the
-device's usage counter and the counter of 'active' children of the device.
+device_run_wake() returns 'true' for the device and the device is put into a
+low-power state during the execution of the suspend callback, it is expected
+that remote wakeup will be enabled for the device.  Generally, remote wakeup
+should be enabled for all input devices put into low-power states at run time.
+
+The subsystem-level resume callback, if present, is _entirely_ _responsible_ for
+handling the resume of the device as appropriate, which may, but need not
+include executing the device driver's own ->runtime_resume() callback (from the
+PM core's point of view it is not necessary to implement a ->runtime_resume()
+callback in a device driver as long as the subsystem-level resume callback knows
+what to do to handle the device).
+
+  * Once the subsystem-level resume callback (or the driver resume callback, if
+    invoked directly) has completed successfully, the PM core regards the device
+    as fully operational, which means that the device _must_ be able to complete
+    I/O operations as needed.  The runtime PM status of the device is then
+    'active'.
+
+  * If the resume callback returns an error code, the PM core regards this as a
+    fatal error and will refuse to run the helper functions described in Section
+    4 for the device, until its status is directly set to either 'active', or
+    'suspended' (by means of special helper functions provided by the PM core
+    for this purpose).
+
+The idle callback (a subsystem-level one, if present, or the driver one) is
+executed by the PM core whenever the device appears to be idle, which is
+indicated to the PM core by two counters, the device's usage counter and the
+counter of 'active' children of the device.
 
   * If any of these counters is decreased using a helper function provided by
     the PM core and it turns out to be equal to zero, the other counter is
     checked.  If that counter also is equal to zero, the PM core executes the
-    subsystem-level idle callback with the device as an argument.
+    idle callback with the device as its argument.
 
-The action performed by a subsystem-level idle callback is totally dependent on
-the subsystem in question, but the expected and recommended action is to check
+The action performed by the idle callback is totally dependent on the subsystem
+(or driver) in question, but the expected and recommended action is to check
 if the device can be suspended (i.e. if all of the conditions necessary for
 suspending the device are satisfied) and to queue up a suspend request for the
 device in that case.  The value returned by this callback is ignored by the PM
 core.
 
 The helper functions provided by the PM core, described in Section 4, guarantee
-that the following constraints are met with respect to the bus type's runtime
-PM callbacks:
+that the following constraints are met with respect to runtime PM callbacks for
+one device:
 
 (1) The callbacks are mutually exclusive (e.g. it is forbidden to execute
     ->runtime_suspend() in parallel with ->runtime_resume() or with another
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index b5cef7e..e2cc3d2 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -383,10 +383,15 @@ static int device_resume_noirq(struct device *dev, pm_message_t state)
 		info = "EARLY class ";
 		callback = pm_noirq_op(dev->class->pm, state);
 	} else if (dev->bus && dev->bus->pm) {
-		info = "EARLY ";
+		info = "EARLY bus ";
 		callback = pm_noirq_op(dev->bus->pm, state);
 	}
 
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "EARLY driver ";
+		callback = pm_noirq_op(dev->driver->pm, state);
+	}
+
 	error = dpm_run_callback(callback, dev, state, info);
 
 	TRACE_RESUME(error);
@@ -464,20 +469,20 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	if (dev->pm_domain) {
 		info = "power domain ";
 		callback = pm_op(&dev->pm_domain->ops, state);
-		goto End;
+		goto Driver;
 	}
 
 	if (dev->type && dev->type->pm) {
 		info = "type ";
 		callback = pm_op(dev->type->pm, state);
-		goto End;
+		goto Driver;
 	}
 
 	if (dev->class) {
 		if (dev->class->pm) {
 			info = "class ";
 			callback = pm_op(dev->class->pm, state);
-			goto End;
+			goto Driver;
 		} else if (dev->class->resume) {
 			info = "legacy class ";
 			callback = dev->class->resume;
@@ -487,14 +492,21 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 
 	if (dev->bus) {
 		if (dev->bus->pm) {
-			info = "";
+			info = "bus ";
 			callback = pm_op(dev->bus->pm, state);
 		} else if (dev->bus->resume) {
-			info = "legacy ";
+			info = "legacy bus ";
 			callback = dev->bus->resume;
+			goto End;
 		}
 	}
 
+ Driver:
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "driver ";
+		callback = pm_op(dev->driver->pm, state);
+	}
+
  End:
 	error = dpm_run_callback(callback, dev, state, info);
 	dev->power.is_suspended = false;
@@ -588,24 +600,33 @@ void dpm_resume(pm_message_t state)
  */
 static void device_complete(struct device *dev, pm_message_t state)
 {
+	void (*callback)(struct device *) = NULL;
+	char *info = NULL;
+
 	device_lock(dev);
 
 	if (dev->pm_domain) {
-		pm_dev_dbg(dev, state, "completing power domain ");
-		if (dev->pm_domain->ops.complete)
-			dev->pm_domain->ops.complete(dev);
+		info = "completing power domain ";
+		callback = dev->pm_domain->ops.complete;
 	} else if (dev->type && dev->type->pm) {
-		pm_dev_dbg(dev, state, "completing type ");
-		if (dev->type->pm->complete)
-			dev->type->pm->complete(dev);
+		info = "completing type ";
+		callback = dev->type->pm->complete;
 	} else if (dev->class && dev->class->pm) {
-		pm_dev_dbg(dev, state, "completing class ");
-		if (dev->class->pm->complete)
-			dev->class->pm->complete(dev);
+		info = "completing class ";
+		callback = dev->class->pm->complete;
 	} else if (dev->bus && dev->bus->pm) {
-		pm_dev_dbg(dev, state, "completing ");
-		if (dev->bus->pm->complete)
-			dev->bus->pm->complete(dev);
+		info = "completing bus ";
+		callback = dev->bus->pm->complete;
+	}
+
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "completing driver ";
+		callback = dev->driver->pm->complete;
+	}
+
+	if (callback) {
+		pm_dev_dbg(dev, state, info);
+		callback(dev);
 	}
 
 	device_unlock(dev);
@@ -704,10 +725,15 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state)
 		info = "LATE class ";
 		callback = pm_noirq_op(dev->class->pm, state);
 	} else if (dev->bus && dev->bus->pm) {
-		info = "LATE ";
+		info = "LATE bus ";
 		callback = pm_noirq_op(dev->bus->pm, state);
 	}
 
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "LATE driver ";
+		callback = pm_noirq_op(dev->driver->pm, state);
+	}
+
 	return dpm_run_callback(callback, dev, state, info);
 }
 
@@ -832,16 +858,21 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 
 	if (dev->bus) {
 		if (dev->bus->pm) {
-			info = "";
+			info = "bus ";
 			callback = pm_op(dev->bus->pm, state);
 		} else if (dev->bus->suspend) {
-			pm_dev_dbg(dev, state, "legacy ");
+			pm_dev_dbg(dev, state, "legacy bus ");
 			error = legacy_suspend(dev, state, dev->bus->suspend);
 			goto End;
 		}
 	}
 
  Run:
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "driver ";
+		callback = pm_op(dev->driver->pm, state);
+	}
+
 	error = dpm_run_callback(callback, dev, state, info);
 
  End:
@@ -949,6 +980,8 @@ int dpm_suspend(pm_message_t state)
  */
 static int device_prepare(struct device *dev, pm_message_t state)
 {
+	int (*callback)(struct device *) = NULL;
+	char *info = NULL;
 	int error = 0;
 
 	device_lock(dev);
@@ -956,25 +989,27 @@ static int device_prepare(struct device *dev, pm_message_t state)
 	dev->power.wakeup_path = device_may_wakeup(dev);
 
 	if (dev->pm_domain) {
-		pm_dev_dbg(dev, state, "preparing power domain ");
-		if (dev->pm_domain->ops.prepare)
-			error = dev->pm_domain->ops.prepare(dev);
-		suspend_report_result(dev->pm_domain->ops.prepare, error);
+		info = "preparing power domain ";
+		callback = dev->pm_domain->ops.prepare;
 	} else if (dev->type && dev->type->pm) {
-		pm_dev_dbg(dev, state, "preparing type ");
-		if (dev->type->pm->prepare)
-			error = dev->type->pm->prepare(dev);
-		suspend_report_result(dev->type->pm->prepare, error);
+		info = "preparing type ";
+		callback = dev->type->pm->prepare;
 	} else if (dev->class && dev->class->pm) {
-		pm_dev_dbg(dev, state, "preparing class ");
-		if (dev->class->pm->prepare)
-			error = dev->class->pm->prepare(dev);
-		suspend_report_result(dev->class->pm->prepare, error);
+		info = "preparing class ";
+		callback = dev->class->pm->prepare;
 	} else if (dev->bus && dev->bus->pm) {
-		pm_dev_dbg(dev, state, "preparing ");
-		if (dev->bus->pm->prepare)
-			error = dev->bus->pm->prepare(dev);
-		suspend_report_result(dev->bus->pm->prepare, error);
+		info = "preparing bus ";
+		callback = dev->bus->pm->prepare;
+	}
+
+	if (!callback && dev->driver && dev->driver->pm) {
+		info = "preparing driver ";
+		callback = dev->driver->pm->prepare;
+	}
+
+	if (callback) {
+		error = callback(dev);
+		suspend_report_result(callback, error);
 	}
 
 	device_unlock(dev);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 8c78443..c56efd7 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -250,6 +250,9 @@ static int rpm_idle(struct device *dev, int rpmflags)
 	else
 		callback = NULL;
 
+	if (!callback && dev->driver && dev->driver->pm)
+		callback = dev->driver->pm->runtime_idle;
+
 	if (callback)
 		__rpm_callback(callback, dev);
 
@@ -413,6 +416,9 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	else
 		callback = NULL;
 
+	if (!callback && dev->driver && dev->driver->pm)
+		callback = dev->driver->pm->runtime_suspend;
+
 	retval = rpm_callback(callback, dev);
 	if (retval) {
 		__update_runtime_status(dev, RPM_ACTIVE);
@@ -633,6 +639,9 @@ static int rpm_resume(struct device *dev, int rpmflags)
 	else
 		callback = NULL;
 
+	if (!callback && dev->driver && dev->driver->pm)
+		callback = dev->driver->pm->runtime_resume;
+
 	retval = rpm_callback(callback, dev);
 	if (retval) {
 		__update_runtime_status(dev, RPM_SUSPENDED);
-- 
cgit v0.10.2


From 9b39e73d0c2b265a7f8748b0e9a9f09be84079a8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 18 Dec 2011 00:34:24 +0100
Subject: PM / Sleep: Remove forward-only callbacks from platform bus type

The forward-only PM callbacks provided by the platform bus type are
not necessary any more, because the PM core executes driver callbacks
when the corresponding subsystem callbacks are not present, so drop
them.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 7a24895..7d912d5 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -700,25 +700,6 @@ static int platform_legacy_resume(struct device *dev)
 	return ret;
 }
 
-int platform_pm_prepare(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (drv && drv->pm && drv->pm->prepare)
-		ret = drv->pm->prepare(dev);
-
-	return ret;
-}
-
-void platform_pm_complete(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-
-	if (drv && drv->pm && drv->pm->complete)
-		drv->pm->complete(dev);
-}
-
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_SUSPEND
@@ -741,22 +722,6 @@ int platform_pm_suspend(struct device *dev)
 	return ret;
 }
 
-int platform_pm_suspend_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->suspend_noirq)
-			ret = drv->pm->suspend_noirq(dev);
-	}
-
-	return ret;
-}
-
 int platform_pm_resume(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -775,22 +740,6 @@ int platform_pm_resume(struct device *dev)
 	return ret;
 }
 
-int platform_pm_resume_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->resume_noirq)
-			ret = drv->pm->resume_noirq(dev);
-	}
-
-	return ret;
-}
-
 #endif /* CONFIG_SUSPEND */
 
 #ifdef CONFIG_HIBERNATE_CALLBACKS
@@ -813,22 +762,6 @@ int platform_pm_freeze(struct device *dev)
 	return ret;
 }
 
-int platform_pm_freeze_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->freeze_noirq)
-			ret = drv->pm->freeze_noirq(dev);
-	}
-
-	return ret;
-}
-
 int platform_pm_thaw(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -847,22 +780,6 @@ int platform_pm_thaw(struct device *dev)
 	return ret;
 }
 
-int platform_pm_thaw_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->thaw_noirq)
-			ret = drv->pm->thaw_noirq(dev);
-	}
-
-	return ret;
-}
-
 int platform_pm_poweroff(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -881,22 +798,6 @@ int platform_pm_poweroff(struct device *dev)
 	return ret;
 }
 
-int platform_pm_poweroff_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->poweroff_noirq)
-			ret = drv->pm->poweroff_noirq(dev);
-	}
-
-	return ret;
-}
-
 int platform_pm_restore(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -915,22 +816,6 @@ int platform_pm_restore(struct device *dev)
 	return ret;
 }
 
-int platform_pm_restore_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->restore_noirq)
-			ret = drv->pm->restore_noirq(dev);
-	}
-
-	return ret;
-}
-
 #endif /* CONFIG_HIBERNATE_CALLBACKS */
 
 static const struct dev_pm_ops platform_dev_pm_ops = {
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 2a23f7d..b5267c9 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -264,62 +264,34 @@ static inline char *early_platform_driver_setup_func(void)		\
 }
 #endif /* MODULE */
 
-#ifdef CONFIG_PM_SLEEP
-extern int platform_pm_prepare(struct device *dev);
-extern void platform_pm_complete(struct device *dev);
-#else
-#define platform_pm_prepare	NULL
-#define platform_pm_complete	NULL
-#endif
-
 #ifdef CONFIG_SUSPEND
 extern int platform_pm_suspend(struct device *dev);
-extern int platform_pm_suspend_noirq(struct device *dev);
 extern int platform_pm_resume(struct device *dev);
-extern int platform_pm_resume_noirq(struct device *dev);
 #else
 #define platform_pm_suspend		NULL
 #define platform_pm_resume		NULL
-#define platform_pm_suspend_noirq	NULL
-#define platform_pm_resume_noirq	NULL
 #endif
 
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 extern int platform_pm_freeze(struct device *dev);
-extern int platform_pm_freeze_noirq(struct device *dev);
 extern int platform_pm_thaw(struct device *dev);
-extern int platform_pm_thaw_noirq(struct device *dev);
 extern int platform_pm_poweroff(struct device *dev);
-extern int platform_pm_poweroff_noirq(struct device *dev);
 extern int platform_pm_restore(struct device *dev);
-extern int platform_pm_restore_noirq(struct device *dev);
 #else
 #define platform_pm_freeze		NULL
 #define platform_pm_thaw		NULL
 #define platform_pm_poweroff		NULL
 #define platform_pm_restore		NULL
-#define platform_pm_freeze_noirq	NULL
-#define platform_pm_thaw_noirq		NULL
-#define platform_pm_poweroff_noirq	NULL
-#define platform_pm_restore_noirq	NULL
 #endif
 
 #ifdef CONFIG_PM_SLEEP
 #define USE_PLATFORM_PM_SLEEP_OPS \
-	.prepare = platform_pm_prepare, \
-	.complete = platform_pm_complete, \
 	.suspend = platform_pm_suspend, \
 	.resume = platform_pm_resume, \
 	.freeze = platform_pm_freeze, \
 	.thaw = platform_pm_thaw, \
 	.poweroff = platform_pm_poweroff, \
-	.restore = platform_pm_restore, \
-	.suspend_noirq = platform_pm_suspend_noirq, \
-	.resume_noirq = platform_pm_resume_noirq, \
-	.freeze_noirq = platform_pm_freeze_noirq, \
-	.thaw_noirq = platform_pm_thaw_noirq, \
-	.poweroff_noirq = platform_pm_poweroff_noirq, \
-	.restore_noirq = platform_pm_restore_noirq,
+	.restore = platform_pm_restore,
 #else
 #define USE_PLATFORM_PM_SLEEP_OPS
 #endif
-- 
cgit v0.10.2


From 8114ab763b2d297c8af49bf380a093d76e929692 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 18 Dec 2011 00:34:31 +0100
Subject: PM / Sleep: Remove forward-only callbacks from AMBA bus type

The forward-only PM callbacks provided by the AMBA bus type are not
necessary any more, because the PM core executes driver callbacks
when the corresponding subsystem callbacks are not present, so drop
them.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index bd230e8..0304b3f 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -109,31 +109,7 @@ static int amba_legacy_resume(struct device *dev)
 	return ret;
 }
 
-static int amba_pm_prepare(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (drv && drv->pm && drv->pm->prepare)
-		ret = drv->pm->prepare(dev);
-
-	return ret;
-}
-
-static void amba_pm_complete(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-
-	if (drv && drv->pm && drv->pm->complete)
-		drv->pm->complete(dev);
-}
-
-#else /* !CONFIG_PM_SLEEP */
-
-#define amba_pm_prepare		NULL
-#define amba_pm_complete		NULL
-
-#endif /* !CONFIG_PM_SLEEP */
+#endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_SUSPEND
 
@@ -155,22 +131,6 @@ static int amba_pm_suspend(struct device *dev)
 	return ret;
 }
 
-static int amba_pm_suspend_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->suspend_noirq)
-			ret = drv->pm->suspend_noirq(dev);
-	}
-
-	return ret;
-}
-
 static int amba_pm_resume(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -189,28 +149,10 @@ static int amba_pm_resume(struct device *dev)
 	return ret;
 }
 
-static int amba_pm_resume_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->resume_noirq)
-			ret = drv->pm->resume_noirq(dev);
-	}
-
-	return ret;
-}
-
 #else /* !CONFIG_SUSPEND */
 
 #define amba_pm_suspend		NULL
 #define amba_pm_resume		NULL
-#define amba_pm_suspend_noirq	NULL
-#define amba_pm_resume_noirq	NULL
 
 #endif /* !CONFIG_SUSPEND */
 
@@ -234,22 +176,6 @@ static int amba_pm_freeze(struct device *dev)
 	return ret;
 }
 
-static int amba_pm_freeze_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->freeze_noirq)
-			ret = drv->pm->freeze_noirq(dev);
-	}
-
-	return ret;
-}
-
 static int amba_pm_thaw(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -268,22 +194,6 @@ static int amba_pm_thaw(struct device *dev)
 	return ret;
 }
 
-static int amba_pm_thaw_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->thaw_noirq)
-			ret = drv->pm->thaw_noirq(dev);
-	}
-
-	return ret;
-}
-
 static int amba_pm_poweroff(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -302,22 +212,6 @@ static int amba_pm_poweroff(struct device *dev)
 	return ret;
 }
 
-static int amba_pm_poweroff_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->poweroff_noirq)
-			ret = drv->pm->poweroff_noirq(dev);
-	}
-
-	return ret;
-}
-
 static int amba_pm_restore(struct device *dev)
 {
 	struct device_driver *drv = dev->driver;
@@ -336,32 +230,12 @@ static int amba_pm_restore(struct device *dev)
 	return ret;
 }
 
-static int amba_pm_restore_noirq(struct device *dev)
-{
-	struct device_driver *drv = dev->driver;
-	int ret = 0;
-
-	if (!drv)
-		return 0;
-
-	if (drv->pm) {
-		if (drv->pm->restore_noirq)
-			ret = drv->pm->restore_noirq(dev);
-	}
-
-	return ret;
-}
-
 #else /* !CONFIG_HIBERNATE_CALLBACKS */
 
 #define amba_pm_freeze		NULL
 #define amba_pm_thaw		NULL
 #define amba_pm_poweroff		NULL
 #define amba_pm_restore		NULL
-#define amba_pm_freeze_noirq	NULL
-#define amba_pm_thaw_noirq		NULL
-#define amba_pm_poweroff_noirq	NULL
-#define amba_pm_restore_noirq	NULL
 
 #endif /* !CONFIG_HIBERNATE_CALLBACKS */
 
@@ -402,20 +276,12 @@ static int amba_pm_runtime_resume(struct device *dev)
 #ifdef CONFIG_PM
 
 static const struct dev_pm_ops amba_pm = {
-	.prepare	= amba_pm_prepare,
-	.complete	= amba_pm_complete,
 	.suspend	= amba_pm_suspend,
 	.resume		= amba_pm_resume,
 	.freeze		= amba_pm_freeze,
 	.thaw		= amba_pm_thaw,
 	.poweroff	= amba_pm_poweroff,
 	.restore	= amba_pm_restore,
-	.suspend_noirq	= amba_pm_suspend_noirq,
-	.resume_noirq	= amba_pm_resume_noirq,
-	.freeze_noirq	= amba_pm_freeze_noirq,
-	.thaw_noirq	= amba_pm_thaw_noirq,
-	.poweroff_noirq	= amba_pm_poweroff_noirq,
-	.restore_noirq	= amba_pm_restore_noirq,
 	SET_RUNTIME_PM_OPS(
 		amba_pm_runtime_suspend,
 		amba_pm_runtime_resume,
-- 
cgit v0.10.2


From 90363ddf0a1a4dccfbb8d0c10b8f488bc7fa69f8 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 18 Dec 2011 00:34:42 +0100
Subject: PM: Drop generic_subsys_pm_ops

Since the PM core is now going to execute driver callbacks directly
if the corresponding subsystem callbacks are not present,
forward-only subsystem callbacks (i.e. such that only execute the
corresponding driver callbacks) are not necessary any more.  Thus
it is possible to remove generic_subsys_pm_ops, because the only
callback in there that is not forward-only, .runtime_idle, is not
really used by the only user of generic_subsys_pm_ops, which is
vio_bus_type.

However, the generic callback routines themselves cannot be removed
from generic_ops.c, because they are used individually by a number
of subsystems.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index f65af61..8b08629 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1406,7 +1406,6 @@ static struct bus_type vio_bus_type = {
 	.match = vio_bus_match,
 	.probe = vio_bus_probe,
 	.remove = vio_bus_remove,
-	.pm = GENERIC_SUBSYS_PM_OPS,
 };
 
 /**
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index 5a5b154..10bdd79 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -276,28 +276,3 @@ void pm_generic_complete(struct device *dev)
 	pm_runtime_idle(dev);
 }
 #endif /* CONFIG_PM_SLEEP */
-
-struct dev_pm_ops generic_subsys_pm_ops = {
-#ifdef CONFIG_PM_SLEEP
-	.prepare = pm_generic_prepare,
-	.suspend = pm_generic_suspend,
-	.suspend_noirq = pm_generic_suspend_noirq,
-	.resume = pm_generic_resume,
-	.resume_noirq = pm_generic_resume_noirq,
-	.freeze = pm_generic_freeze,
-	.freeze_noirq = pm_generic_freeze_noirq,
-	.thaw = pm_generic_thaw,
-	.thaw_noirq = pm_generic_thaw_noirq,
-	.poweroff = pm_generic_poweroff,
-	.poweroff_noirq = pm_generic_poweroff_noirq,
-	.restore = pm_generic_restore,
-	.restore_noirq = pm_generic_restore_noirq,
-	.complete = pm_generic_complete,
-#endif
-#ifdef CONFIG_PM_RUNTIME
-	.runtime_suspend = pm_generic_runtime_suspend,
-	.runtime_resume = pm_generic_runtime_resume,
-	.runtime_idle = pm_generic_runtime_idle,
-#endif
-};
-EXPORT_SYMBOL_GPL(generic_subsys_pm_ops);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 3f3ed83..21e04dd 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -300,19 +300,6 @@ const struct dev_pm_ops name = { \
 	SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
 }
 
-/*
- * Use this for subsystems (bus types, device types, device classes) that don't
- * need any special suspend/resume handling in addition to invoking the PM
- * callbacks provided by device drivers supporting both the system sleep PM and
- * runtime PM, make the pm member point to generic_subsys_pm_ops.
- */
-#ifdef CONFIG_PM
-extern struct dev_pm_ops generic_subsys_pm_ops;
-#define GENERIC_SUBSYS_PM_OPS	(&generic_subsys_pm_ops)
-#else
-#define GENERIC_SUBSYS_PM_OPS	NULL
-#endif
-
 /**
  * PM_EVENT_ messages
  *
-- 
cgit v0.10.2