From 50fb4f7fc907efff65eadb0b74387a9ffed6e849 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:22 -0800 Subject: freezer: fix current->state restoration race in refrigerator() refrigerator() saves current->state before entering frozen state and restores it before returning using __set_current_state(); however, this is racy, for example, please consider the following sequence. set_current_state(TASK_INTERRUPTIBLE); try_to_freeze(); if (kthread_should_stop()) break; schedule(); If kthread_stop() races with ->state restoration, the restoration can restore ->state to TASK_INTERRUPTIBLE after kthread_stop() sets it to TASK_RUNNING but kthread_should_stop() may still see zero ->should_stop because there's no memory barrier between restoring TASK_INTERRUPTIBLE and kthread_should_stop() test. This isn't restricted to kthread_should_stop(). current->state is often used in memory barrier based synchronization and silently restoring it w/o mb breaks them. Use set_current_state() instead. Signed-off-by: Tejun Heo diff --git a/kernel/freezer.c b/kernel/freezer.c index 7be56c5..3f46010 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -58,7 +58,13 @@ void refrigerator(void) current->flags &= ~PF_FREEZING; pr_debug("%s left refrigerator\n", current->comm); - __set_current_state(save); + + /* + * Restore saved task state before returning. The mb'd version + * needs to be used; otherwise, it might silently break + * synchronization which depends on ordered task state change. + */ + set_current_state(save); } EXPORT_SYMBOL(refrigerator); -- cgit v0.10.2 From 3a7cbd50f74907580eb47a8d08e1f29741b81abf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:22 -0800 Subject: freezer: don't unnecessarily set PF_NOFREEZE explicitly Some drivers set PF_NOFREEZE in their kthread functions which is completely unnecessary and racy - some part of freezer code doesn't consider cases where PF_NOFREEZE is set asynchronous to freezer operations. In general, there's no reason to allow setting PF_NOFREEZE explicitly. Remove them and change the documentation to note that setting PF_NOFREEZE directly isn't allowed. -v2: Dropped change to twl4030-irq.c as it no longer uses PF_NOFREEZE. Signed-off-by: Tejun Heo Acked-by: "Gustavo F. Padovan" Acked-by: Samuel Ortiz Cc: Marcel Holtmann Cc: wwang diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt index 316c2ba..587e082 100644 --- a/Documentation/power/freezing-of-tasks.txt +++ b/Documentation/power/freezing-of-tasks.txt @@ -67,7 +67,7 @@ III. Which kernel threads are freezable? Kernel threads are not freezable by default. However, a kernel thread may clear PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE -directly is strongly discouraged). From this point it is regarded as freezable +directly is not allowed). From this point it is regarded as freezable and must call try_to_freeze() in a suitable place. IV. Why do we do that? diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c index a88a78c..6c3defa 100644 --- a/drivers/bluetooth/btmrvl_main.c +++ b/drivers/bluetooth/btmrvl_main.c @@ -475,8 +475,6 @@ static int btmrvl_service_main_thread(void *data) init_waitqueue_entry(&wait, current); - current->flags |= PF_NOFREEZE; - for (;;) { add_wait_queue(&thread->wait_q, &wait); diff --git a/drivers/mfd/twl6030-irq.c b/drivers/mfd/twl6030-irq.c index 3eee45f..c6b456a 100644 --- a/drivers/mfd/twl6030-irq.c +++ b/drivers/mfd/twl6030-irq.c @@ -138,8 +138,6 @@ static int twl6030_irq_thread(void *data) static const unsigned max_i2c_errors = 100; int ret; - current->flags |= PF_NOFREEZE; - while (!kthread_should_stop()) { int i; union { diff --git a/drivers/staging/rts_pstor/rtsx.c b/drivers/staging/rts_pstor/rtsx.c index 480b0ed..8a7803c 100644 --- a/drivers/staging/rts_pstor/rtsx.c +++ b/drivers/staging/rts_pstor/rtsx.c @@ -466,8 +466,6 @@ static int rtsx_control_thread(void *__dev) struct rtsx_chip *chip = dev->chip; struct Scsi_Host *host = rtsx_to_host(dev); - current->flags |= PF_NOFREEZE; - for (;;) { if (wait_for_completion_interruptible(&dev->cmnd_ready)) break; -- cgit v0.10.2 From a0acae0e886d44bd5ce6d2f173c1ace0fcf0d9f6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:22 -0800 Subject: freezer: unexport refrigerator() and update try_to_freeze() slightly There is no reason to export two functions for entering the refrigerator. Calling refrigerator() instead of try_to_freeze() doesn't save anything noticeable or removes any race condition. * Rename refrigerator() to __refrigerator() and make it return bool indicating whether it scheduled out for freezing. * Update try_to_freeze() to return bool and relay the return value of __refrigerator() if freezing(). * Convert all refrigerator() users to try_to_freeze(). * Update documentation accordingly. * While at it, add might_sleep() to try_to_freeze(). Signed-off-by: Tejun Heo Cc: Samuel Ortiz Cc: Chris Mason Cc: "Theodore Ts'o" Cc: Steven Whitehouse Cc: Andrew Morton Cc: Jan Kara Cc: KONISHI Ryusuke Cc: Christoph Hellwig diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt index 587e082..3ab9fbd 100644 --- a/Documentation/power/freezing-of-tasks.txt +++ b/Documentation/power/freezing-of-tasks.txt @@ -21,7 +21,7 @@ freeze_processes() (defined in kernel/power/process.c) is called. It executes try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and either wakes them up, if they are kernel threads, or sends fake signals to them, if they are user space processes. A task that has TIF_FREEZE set, should react -to it by calling the function called refrigerator() (defined in +to it by calling the function called __refrigerator() (defined in kernel/freezer.c), which sets the task's PF_FROZEN flag, changes its state to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is cleared for it. Then, we say that the task is 'frozen' and therefore the set of functions @@ -29,10 +29,10 @@ handling this mechanism is referred to as 'the freezer' (these functions are defined in kernel/power/process.c, kernel/freezer.c & include/linux/freezer.h). User space processes are generally frozen before kernel threads. -It is not recommended to call refrigerator() directly. Instead, it is -recommended to use the try_to_freeze() function (defined in -include/linux/freezer.h), that checks the task's TIF_FREEZE flag and makes the -task enter refrigerator() if the flag is set. +__refrigerator() must not be called directly. Instead, use the +try_to_freeze() function (defined in include/linux/freezer.h), that checks +the task's TIF_FREEZE flag and makes the task enter __refrigerator() if the +flag is set. For user space processes try_to_freeze() is called automatically from the signal-handling code, but the freezable kernel threads need to call it @@ -61,7 +61,7 @@ wait_event_freezable() and wait_event_freezable_timeout() macros. After the system memory state has been restored from a hibernation image and devices have been reinitialized, the function thaw_processes() is called in order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that -have been frozen leave refrigerator() and continue running. +have been frozen leave __refrigerator() and continue running. III. Which kernel threads are freezable? diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c index 41c96b3..e880c79 100644 --- a/drivers/net/irda/stir4200.c +++ b/drivers/net/irda/stir4200.c @@ -750,7 +750,7 @@ static int stir_transmit_thread(void *arg) write_reg(stir, REG_CTRL1, CTRL1_TXPWD|CTRL1_RXPWD); - refrigerator(); + try_to_freeze(); if (change_speed(stir, stir->speed)) break; diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 7ec1409..98ab240 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -340,7 +340,7 @@ again: if (freezing(current)) { worker->working = 0; spin_unlock_irq(&worker->lock); - refrigerator(); + try_to_freeze(); } else { spin_unlock_irq(&worker->lock); if (!kthread_should_stop()) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 62afe5c..622654f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1579,9 +1579,7 @@ static int cleaner_kthread(void *arg) btrfs_run_defrag_inodes(root->fs_info); } - if (freezing(current)) { - refrigerator(); - } else { + if (!try_to_freeze()) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop()) schedule(); @@ -1635,9 +1633,7 @@ sleep: wake_up_process(root->fs_info->cleaner_kthread); mutex_unlock(&root->fs_info->transaction_kthread_mutex); - if (freezing(current)) { - refrigerator(); - } else { + if (!try_to_freeze()) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop() && !btrfs_transaction_blocked(root->fs_info)) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9953d80..877350e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2882,8 +2882,7 @@ cont_thread: } mutex_unlock(&eli->li_list_mtx); - if (freezing(current)) - refrigerator(); + try_to_freeze(); cur = jiffies; if ((time_after_eq(cur, next_wakeup)) || diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 5986464..8154d42 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -951,8 +951,8 @@ int gfs2_logd(void *data) wake_up(&sdp->sd_log_waitq); t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; - if (freezing(current)) - refrigerator(); + + try_to_freeze(); do { prepare_to_wait(&sdp->sd_logd_waitq, &wait, diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 7e528dc..d49669e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1427,8 +1427,8 @@ int gfs2_quotad(void *data) /* Check for & recover partially truncated inodes */ quotad_check_trunc_list(sdp); - if (freezing(current)) - refrigerator(); + try_to_freeze(); + t = min(quotad_timeo, statfs_timeo); prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index fea8dd6..a96cff0 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -166,7 +166,7 @@ loop: */ jbd_debug(1, "Now suspending kjournald\n"); spin_unlock(&journal->j_state_lock); - refrigerator(); + try_to_freeze(); spin_lock(&journal->j_state_lock); } else { /* diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0fa0123..c0a5f9f 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -173,7 +173,7 @@ loop: */ jbd_debug(1, "Now suspending kjournald2\n"); write_unlock(&journal->j_state_lock); - refrigerator(); + try_to_freeze(); write_lock(&journal->j_state_lock); } else { /* diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index cc5f811..2eb952c 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2349,7 +2349,7 @@ int jfsIOWait(void *arg) if (freezing(current)) { spin_unlock_irq(&log_redrive_lock); - refrigerator(); + try_to_freeze(); } else { set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(&log_redrive_lock); diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index af96060..bb8b661 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -2800,7 +2800,7 @@ int jfs_lazycommit(void *arg) if (freezing(current)) { LAZY_UNLOCK(flags); - refrigerator(); + try_to_freeze(); } else { DECLARE_WAITQUEUE(wq, current); @@ -2994,7 +2994,7 @@ int jfs_sync(void *arg) if (freezing(current)) { TXN_UNLOCK(); - refrigerator(); + try_to_freeze(); } else { set_current_state(TASK_INTERRUPTIBLE); TXN_UNLOCK(); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index bb24ab6..0e72ad6 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2470,7 +2470,7 @@ static int nilfs_segctor_thread(void *arg) if (freezing(current)) { spin_unlock(&sci->sc_state_lock); - refrigerator(); + try_to_freeze(); spin_lock(&sci->sc_state_lock); } else { DEFINE_WAIT(wait); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index cf0ac05..0188299 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1703,7 +1703,7 @@ xfsbufd( if (unlikely(freezing(current))) { set_bit(XBT_FORCE_SLEEP, &target->bt_flags); - refrigerator(); + try_to_freeze(); } else { clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); } diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a5386e3..7a9427e 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -47,18 +47,17 @@ static inline bool should_send_signal(struct task_struct *p) /* Takes and releases task alloc lock using task_lock() */ extern int thaw_process(struct task_struct *p); -extern void refrigerator(void); +extern bool __refrigerator(void); extern int freeze_processes(void); extern int freeze_kernel_threads(void); extern void thaw_processes(void); -static inline int try_to_freeze(void) +static inline bool try_to_freeze(void) { - if (freezing(current)) { - refrigerator(); - return 1; - } else - return 0; + might_sleep(); + if (likely(!freezing(current))) + return false; + return __refrigerator(); } extern bool freeze_task(struct task_struct *p, bool sig_only); @@ -181,12 +180,12 @@ static inline void set_freeze_flag(struct task_struct *p) {} static inline void clear_freeze_flag(struct task_struct *p) {} static inline int thaw_process(struct task_struct *p) { return 1; } -static inline void refrigerator(void) {} +static inline bool __refrigerator(void) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} -static inline int try_to_freeze(void) { return 0; } +static inline bool try_to_freeze(void) { return false; } static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} diff --git a/kernel/freezer.c b/kernel/freezer.c index 3f46010..732f14f 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -23,10 +23,11 @@ static inline void frozen_process(void) } /* Refrigerator is place where frozen processes are stored :-). */ -void refrigerator(void) +bool __refrigerator(void) { /* Hmm, should we be allowed to suspend when there are realtime processes around? */ + bool was_frozen = false; long save; task_lock(current); @@ -35,7 +36,7 @@ void refrigerator(void) task_unlock(current); } else { task_unlock(current); - return; + return was_frozen; } save = current->state; pr_debug("%s entered refrigerator\n", current->comm); @@ -51,6 +52,7 @@ void refrigerator(void) set_current_state(TASK_UNINTERRUPTIBLE); if (!frozen(current)) break; + was_frozen = true; schedule(); } @@ -65,8 +67,10 @@ void refrigerator(void) * synchronization which depends on ordered task state change. */ set_current_state(save); + + return was_frozen; } -EXPORT_SYMBOL(refrigerator); +EXPORT_SYMBOL(__refrigerator); static void fake_signal_wake_up(struct task_struct *p) { -- cgit v0.10.2 From 8a32c441c1609f80e55df75422324a1151208f40 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: implement and use kthread_freezable_should_stop() Writeback and thinkpad_acpi have been using thaw_process() to prevent deadlock between the freezer and kthread_stop(); unfortunately, this is inherently racy - nothing prevents freezing from happening between thaw_process() and kthread_stop(). This patch implements kthread_freezable_should_stop() which enters refrigerator if necessary but is guaranteed to return if kthread_stop() is invoked. Both thaw_process() users are converted to use the new function. Note that this deadlock condition exists for many of freezable kthreads. They need to be converted to use the new should_stop or freezable workqueue. Tested with synthetic test case. Signed-off-by: Tejun Heo Acked-by: Henrique de Moraes Holschuh Cc: Jens Axboe Cc: Oleg Nesterov diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 7b82868..4b11fc9 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2456,8 +2456,9 @@ static int hotkey_kthread(void *data) u32 poll_mask, event_mask; unsigned int si, so; unsigned long t; - unsigned int change_detector, must_reset; + unsigned int change_detector; unsigned int poll_freq; + bool was_frozen; mutex_lock(&hotkey_thread_mutex); @@ -2488,14 +2489,14 @@ static int hotkey_kthread(void *data) t = 100; /* should never happen... */ } t = msleep_interruptible(t); - if (unlikely(kthread_should_stop())) + if (unlikely(kthread_freezable_should_stop(&was_frozen))) break; - must_reset = try_to_freeze(); - if (t > 0 && !must_reset) + + if (t > 0 && !was_frozen) continue; mutex_lock(&hotkey_thread_data_mutex); - if (must_reset || hotkey_config_change != change_detector) { + if (was_frozen || hotkey_config_change != change_detector) { /* forget old state on thaw or config change */ si = so; t = 0; @@ -2528,10 +2529,6 @@ exit: static void hotkey_poll_stop_sync(void) { if (tpacpi_hotkey_task) { - if (frozen(tpacpi_hotkey_task) || - freezing(tpacpi_hotkey_task)) - thaw_process(tpacpi_hotkey_task); - kthread_stop(tpacpi_hotkey_task); tpacpi_hotkey_task = NULL; mutex_lock(&hotkey_thread_mutex); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 73c3992..271fde5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -947,7 +947,7 @@ int bdi_writeback_thread(void *data) trace_writeback_thread_start(bdi); - while (!kthread_should_stop()) { + while (!kthread_freezable_should_stop(NULL)) { /* * Remove own delayed wake-up timer, since we are already awake * and we'll take care of the preriodic write-back. @@ -977,8 +977,6 @@ int bdi_writeback_thread(void *data) */ schedule(); } - - try_to_freeze(); } /* Flush any work that raced with us exiting */ diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 7a9427e..d02b784 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -47,7 +47,7 @@ static inline bool should_send_signal(struct task_struct *p) /* Takes and releases task alloc lock using task_lock() */ extern int thaw_process(struct task_struct *p); -extern bool __refrigerator(void); +extern bool __refrigerator(bool check_kthr_stop); extern int freeze_processes(void); extern int freeze_kernel_threads(void); extern void thaw_processes(void); @@ -57,7 +57,7 @@ static inline bool try_to_freeze(void) might_sleep(); if (likely(!freezing(current))) return false; - return __refrigerator(); + return __refrigerator(false); } extern bool freeze_task(struct task_struct *p, bool sig_only); @@ -180,7 +180,7 @@ static inline void set_freeze_flag(struct task_struct *p) {} static inline void clear_freeze_flag(struct task_struct *p) {} static inline int thaw_process(struct task_struct *p) { return 1; } -static inline bool __refrigerator(void) { return false; } +static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 5cac19b..0714b24 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -35,6 +35,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void kthread_bind(struct task_struct *k, unsigned int cpu); int kthread_stop(struct task_struct *k); int kthread_should_stop(void); +bool kthread_freezable_should_stop(bool *was_frozen); void *kthread_data(struct task_struct *k); int kthreadd(void *unused); diff --git a/kernel/freezer.c b/kernel/freezer.c index 732f14f..b83c30e 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -9,6 +9,7 @@ #include #include #include +#include /* * freezing is complete, mark current process as frozen @@ -23,7 +24,7 @@ static inline void frozen_process(void) } /* Refrigerator is place where frozen processes are stored :-). */ -bool __refrigerator(void) +bool __refrigerator(bool check_kthr_stop) { /* Hmm, should we be allowed to suspend when there are realtime processes around? */ @@ -50,7 +51,8 @@ bool __refrigerator(void) for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); - if (!frozen(current)) + if (!frozen(current) || + (check_kthr_stop && kthread_should_stop())) break; was_frozen = true; schedule(); diff --git a/kernel/kthread.c b/kernel/kthread.c index b6d216a..1c36dea 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -59,6 +59,31 @@ int kthread_should_stop(void) EXPORT_SYMBOL(kthread_should_stop); /** + * kthread_freezable_should_stop - should this freezable kthread return now? + * @was_frozen: optional out parameter, indicates whether %current was frozen + * + * kthread_should_stop() for freezable kthreads, which will enter + * refrigerator if necessary. This function is safe from kthread_stop() / + * freezer deadlock and freezable kthreads should use this function instead + * of calling try_to_freeze() directly. + */ +bool kthread_freezable_should_stop(bool *was_frozen) +{ + bool frozen = false; + + might_sleep(); + + if (unlikely(freezing(current))) + frozen = __refrigerator(true); + + if (was_frozen) + *was_frozen = frozen; + + return kthread_should_stop(); +} +EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); + +/** * kthread_data - return data value specified on kthread creation * @task: kthread task in question * diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 71034f4..7ba8fea 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -600,14 +600,10 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi) /* * Finally, kill the kernel thread. We don't need to be RCU - * safe anymore, since the bdi is gone from visibility. Force - * unfreeze of the thread before calling kthread_stop(), otherwise - * it would never exet if it is currently stuck in the refrigerator. + * safe anymore, since the bdi is gone from visibility. */ - if (bdi->wb.task) { - thaw_process(bdi->wb.task); + if (bdi->wb.task) kthread_stop(bdi->wb.task); - } } /* -- cgit v0.10.2 From a5be2d0d1a8746e7be5210e3d6b904455000443c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: rename thaw_process() to __thaw_task() and simplify the implementation thaw_process() now has only internal users - system and cgroup freezers. Remove the unnecessary return value, rename, unexport and collapse __thaw_process() into it. This will help further updates to the freezer code. -v3: oom_kill grew a use of thaw_process() while this patch was pending. Convert it to use __thaw_task() for now. In the longer term, this should be handled by allowing tasks to die if killed even if it's frozen. -v2: minor style update as suggested by Matt. Signed-off-by: Tejun Heo Cc: Paul Menage Cc: Matt Helsley diff --git a/include/linux/freezer.h b/include/linux/freezer.h index d02b784..ba4f512 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -45,7 +45,7 @@ static inline bool should_send_signal(struct task_struct *p) } /* Takes and releases task alloc lock using task_lock() */ -extern int thaw_process(struct task_struct *p); +extern void __thaw_task(struct task_struct *t); extern bool __refrigerator(bool check_kthr_stop); extern int freeze_processes(void); @@ -178,7 +178,6 @@ static inline int frozen(struct task_struct *p) { return 0; } static inline int freezing(struct task_struct *p) { return 0; } static inline void set_freeze_flag(struct task_struct *p) {} static inline void clear_freeze_flag(struct task_struct *p) {} -static inline int thaw_process(struct task_struct *p) { return 1; } static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 5e828a2..a6d405a 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -130,7 +130,7 @@ struct cgroup_subsys freezer_subsys; * write_lock css_set_lock (cgroup iterator start) * task->alloc_lock * read_lock css_set_lock (cgroup iterator start) - * task->alloc_lock (inside thaw_process(), prevents race with refrigerator()) + * task->alloc_lock (inside __thaw_task(), prevents race with refrigerator()) * sighand->siglock */ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, @@ -300,9 +300,8 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) struct task_struct *task; cgroup_iter_start(cgroup, &it); - while ((task = cgroup_iter_next(cgroup, &it))) { - thaw_process(task); - } + while ((task = cgroup_iter_next(cgroup, &it))) + __thaw_task(task); cgroup_iter_end(cgroup, &it); freezer->state = CGROUP_THAWED; diff --git a/kernel/freezer.c b/kernel/freezer.c index b83c30e..c851d58 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -145,18 +145,8 @@ void cancel_freezing(struct task_struct *p) } } -static int __thaw_process(struct task_struct *p) -{ - if (frozen(p)) { - p->flags &= ~PF_FROZEN; - return 1; - } - clear_freeze_flag(p); - return 0; -} - /* - * Wake up a frozen process + * Wake up a frozen task * * task_lock() is needed to prevent the race with refrigerator() which may * occur if the freezing of tasks fails. Namely, without the lock, if the @@ -164,15 +154,18 @@ static int __thaw_process(struct task_struct *p) * refrigerator() could call frozen_process(), in which case the task would be * frozen and no one would thaw it. */ -int thaw_process(struct task_struct *p) +void __thaw_task(struct task_struct *p) { + bool was_frozen; + task_lock(p); - if (__thaw_process(p) == 1) { - task_unlock(p); - wake_up_process(p); - return 1; - } + was_frozen = frozen(p); + if (was_frozen) + p->flags &= ~PF_FROZEN; + else + clear_freeze_flag(p); task_unlock(p); - return 0; + + if (was_frozen) + wake_up_process(p); } -EXPORT_SYMBOL(thaw_process); diff --git a/kernel/power/process.c b/kernel/power/process.c index addbbe5..fe27872 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -186,7 +186,7 @@ static void thaw_tasks(bool nosig_only) if (cgroup_freezing_or_frozen(p)) continue; - thaw_process(p); + __thaw_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 76f2c5a..3134ee2 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -328,7 +328,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, */ if (test_tsk_thread_flag(p, TIF_MEMDIE)) { if (unlikely(frozen(p))) - thaw_process(p); + __thaw_task(p); return ERR_PTR(-1UL); } if (!p->mm) -- cgit v0.10.2 From a585042f7b933539a0b6bc63650c2d49ffb2e55d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: remove racy clear_freeze_flag() and set PF_NOFREEZE on dead tasks clear_freeze_flag() in exit_mm() is racy. Freezing can start afterwards. Remove it. Skipping freezer for exiting task will be properly implemented later. Also, freezable() was testing exit_state directly to make system freezer ignore dead tasks. Let the exiting task set PF_NOFREEZE after entering TASK_DEAD instead. Signed-off-by: Tejun Heo Cc: Oleg Nesterov diff --git a/kernel/exit.c b/kernel/exit.c index d0b7d98..95a4141 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -679,8 +679,6 @@ static void exit_mm(struct task_struct * tsk) tsk->mm = NULL; up_read(&mm->mmap_sem); enter_lazy_tlb(mm, current); - /* We don't want this task to be frozen prematurely */ - clear_freeze_flag(tsk); task_unlock(tsk); mm_update_next_owner(mm); mmput(mm); @@ -1040,6 +1038,7 @@ NORET_TYPE void do_exit(long code) exit_rcu(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; + tsk->flags |= PF_NOFREEZE; /* tell freezer to ignore us */ schedule(); BUG(); /* Avoid "noreturn function does return". */ diff --git a/kernel/power/process.c b/kernel/power/process.c index fe27872..23822dc 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -25,8 +25,7 @@ static inline int freezable(struct task_struct * p) { if ((p == current) || - (p->flags & PF_NOFREEZE) || - (p->exit_state != 0)) + (p->flags & PF_NOFREEZE)) return 0; return 1; } -- cgit v0.10.2 From 6cd8dedcdd8e8de01391a7cf25f0b2afeb24f8f4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:23 -0800 Subject: freezer: don't distinguish nosig tasks on thaw There's no point in thawing nosig tasks before others. There's no ordering requirement between the two groups on thaw, which the staged thawing can't guarantee anyway. Simplify thaw_processes() by removing the distinction and collapsing thaw_tasks() into thaw_processes(). This will help further updates to freezer. Signed-off-by: Tejun Heo diff --git a/kernel/power/process.c b/kernel/power/process.c index 23822dc..9db048f 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -170,34 +170,28 @@ int freeze_kernel_threads(void) return error; } -static void thaw_tasks(bool nosig_only) +void thaw_processes(void) { struct task_struct *g, *p; + oom_killer_enable(); + + printk("Restarting tasks ... "); + + thaw_workqueues(); + read_lock(&tasklist_lock); do_each_thread(g, p) { if (!freezable(p)) continue; - if (nosig_only && should_send_signal(p)) - continue; - if (cgroup_freezing_or_frozen(p)) continue; __thaw_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); -} - -void thaw_processes(void) -{ - oom_killer_enable(); - printk("Restarting tasks ... "); - thaw_workqueues(); - thaw_tasks(true); - thaw_tasks(false); schedule(); printk("done.\n"); } -- cgit v0.10.2 From 0c9af09262864a2744091ee94c98c4a8fd60c98b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: use dedicated lock instead of task_lock() + memory barrier Freezer synchronization is needlessly complicated - it's by no means a hot path and the priority is staying unintrusive and safe. This patch makes it simply use a dedicated lock instead of piggy-backing on task_lock() and playing with memory barriers. On the failure path of try_to_freeze_tasks(), locking is moved from it to cancel_freezing(). This makes the frozen() test racy but the race here is a non-issue as the warning is printed for tasks which failed to enter frozen for 20 seconds and race on PF_FROZEN at the last moment doesn't change anything. This simplifies freezer implementation and eases further changes including some race fixes. Signed-off-by: Tejun Heo diff --git a/kernel/freezer.c b/kernel/freezer.c index c851d58..4130e48 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -11,17 +11,8 @@ #include #include -/* - * freezing is complete, mark current process as frozen - */ -static inline void frozen_process(void) -{ - if (!unlikely(current->flags & PF_NOFREEZE)) { - current->flags |= PF_FROZEN; - smp_wmb(); - } - clear_freeze_flag(current); -} +/* protects freezing and frozen transitions */ +static DEFINE_SPINLOCK(freezer_lock); /* Refrigerator is place where frozen processes are stored :-). */ bool __refrigerator(bool check_kthr_stop) @@ -31,14 +22,16 @@ bool __refrigerator(bool check_kthr_stop) bool was_frozen = false; long save; - task_lock(current); - if (freezing(current)) { - frozen_process(); - task_unlock(current); - } else { - task_unlock(current); + spin_lock_irq(&freezer_lock); + if (!freezing(current)) { + spin_unlock_irq(&freezer_lock); return was_frozen; } + if (!(current->flags & PF_NOFREEZE)) + current->flags |= PF_FROZEN; + clear_freeze_flag(current); + spin_unlock_irq(&freezer_lock); + save = current->state; pr_debug("%s entered refrigerator\n", current->comm); @@ -99,21 +92,18 @@ static void fake_signal_wake_up(struct task_struct *p) */ bool freeze_task(struct task_struct *p, bool sig_only) { - /* - * We first check if the task is freezing and next if it has already - * been frozen to avoid the race with frozen_process() which first marks - * the task as frozen and next clears its TIF_FREEZE. - */ - if (!freezing(p)) { - smp_rmb(); - if (frozen(p)) - return false; - - if (!sig_only || should_send_signal(p)) - set_freeze_flag(p); - else - return false; - } + unsigned long flags; + bool ret = false; + + spin_lock_irqsave(&freezer_lock, flags); + + if (sig_only && !should_send_signal(p)) + goto out_unlock; + + if (frozen(p)) + goto out_unlock; + + set_freeze_flag(p); if (should_send_signal(p)) { fake_signal_wake_up(p); @@ -123,26 +113,28 @@ bool freeze_task(struct task_struct *p, bool sig_only) * TASK_RUNNING transition can't race with task state * testing in try_to_freeze_tasks(). */ - } else if (sig_only) { - return false; } else { wake_up_state(p, TASK_INTERRUPTIBLE); } - - return true; + ret = true; +out_unlock: + spin_unlock_irqrestore(&freezer_lock, flags); + return ret; } void cancel_freezing(struct task_struct *p) { unsigned long flags; + spin_lock_irqsave(&freezer_lock, flags); if (freezing(p)) { pr_debug(" clean up: %s\n", p->comm); clear_freeze_flag(p); - spin_lock_irqsave(&p->sighand->siglock, flags); + spin_lock(&p->sighand->siglock); recalc_sigpending_and_wake(p); - spin_unlock_irqrestore(&p->sighand->siglock, flags); + spin_unlock(&p->sighand->siglock); } + spin_unlock_irqrestore(&freezer_lock, flags); } /* @@ -156,16 +148,14 @@ void cancel_freezing(struct task_struct *p) */ void __thaw_task(struct task_struct *p) { - bool was_frozen; + unsigned long flags; - task_lock(p); - was_frozen = frozen(p); - if (was_frozen) + spin_lock_irqsave(&freezer_lock, flags); + if (frozen(p)) { p->flags &= ~PF_FROZEN; - else - clear_freeze_flag(p); - task_unlock(p); - - if (was_frozen) wake_up_process(p); + } else { + clear_freeze_flag(p); + } + spin_unlock_irqrestore(&freezer_lock, flags); } diff --git a/kernel/power/process.c b/kernel/power/process.c index 9db048f..bd420ca 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -118,11 +118,9 @@ static int try_to_freeze_tasks(bool sig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { - task_lock(p); if (!wakeup && freezing(p) && !freezer_should_skip(p)) sched_show_task(p); cancel_freezing(p); - task_unlock(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } else { -- cgit v0.10.2 From 6907483b4e803a20f0b48cc9afa3817420ce61c5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: make freezing indicate freeze condition in effect Currently freezing (TIF_FREEZE) and frozen (PF_FROZEN) states are interlocked - freezing is set to request freeze and when the task actually freezes, it clears freezing and sets frozen. This interlocking makes things more complex than necessary - freezing doesn't mean there's freezing condition in effect and frozen doesn't match the task actually entering and leaving frozen state (it's cleared by the thawing task). This patch makes freezing indicate that freeze condition is in effect. A task enters and stays frozen if freezing. This makes PF_FROZEN manipulation done only by the task itself and prevents wakeup from __thaw_task() leaking outside of refrigerator. The only place which needs to tell freezing && !frozen is try_to_freeze_task() to whine about tasks which don't enter frozen. It's updated to test the condition explicitly. With the change, frozen() state my linger after __thaw_task() until the task wakes up and exits fridge. This can trigger BUG_ON() in update_if_frozen(). Work it around by testing freezing() && frozen() instead of frozen(). -v2: Oleg pointed out missing re-check of freezing() when trying to clear FROZEN and possible spurious BUG_ON() trigger in update_if_frozen(). Both fixed. Signed-off-by: Tejun Heo Cc: Oleg Nesterov Cc: Paul Menage diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index a6d405a..cd27b08 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -231,7 +231,7 @@ static void update_if_frozen(struct cgroup *cgroup, cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { ntotal++; - if (frozen(task)) + if (freezing(task) && frozen(task)) nfrozen++; } diff --git a/kernel/freezer.c b/kernel/freezer.c index 4130e48..a8822be 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -22,14 +22,19 @@ bool __refrigerator(bool check_kthr_stop) bool was_frozen = false; long save; + /* + * Enter FROZEN. If NOFREEZE, schedule immediate thawing by + * clearing freezing. + */ spin_lock_irq(&freezer_lock); +repeat: if (!freezing(current)) { spin_unlock_irq(&freezer_lock); return was_frozen; } - if (!(current->flags & PF_NOFREEZE)) - current->flags |= PF_FROZEN; - clear_freeze_flag(current); + if (current->flags & PF_NOFREEZE) + clear_freeze_flag(current); + current->flags |= PF_FROZEN; spin_unlock_irq(&freezer_lock); save = current->state; @@ -44,7 +49,7 @@ bool __refrigerator(bool check_kthr_stop) for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); - if (!frozen(current) || + if (!freezing(current) || (check_kthr_stop && kthread_should_stop())) break; was_frozen = true; @@ -54,6 +59,13 @@ bool __refrigerator(bool check_kthr_stop) /* Remove the accounting blocker */ current->flags &= ~PF_FREEZING; + /* leave FROZEN */ + spin_lock_irq(&freezer_lock); + if (freezing(current)) + goto repeat; + current->flags &= ~PF_FROZEN; + spin_unlock_irq(&freezer_lock); + pr_debug("%s left refrigerator\n", current->comm); /* @@ -137,25 +149,19 @@ void cancel_freezing(struct task_struct *p) spin_unlock_irqrestore(&freezer_lock, flags); } -/* - * Wake up a frozen task - * - * task_lock() is needed to prevent the race with refrigerator() which may - * occur if the freezing of tasks fails. Namely, without the lock, if the - * freezing of tasks failed, thaw_tasks() might have run before a task in - * refrigerator() could call frozen_process(), in which case the task would be - * frozen and no one would thaw it. - */ void __thaw_task(struct task_struct *p) { unsigned long flags; + /* + * Clear freezing and kick @p if FROZEN. Clearing is guaranteed to + * be visible to @p as waking up implies wmb. Waking up inside + * freezer_lock also prevents wakeups from leaking outside + * refrigerator. + */ spin_lock_irqsave(&freezer_lock, flags); - if (frozen(p)) { - p->flags &= ~PF_FROZEN; + clear_freeze_flag(p); + if (frozen(p)) wake_up_process(p); - } else { - clear_freeze_flag(p); - } spin_unlock_irqrestore(&freezer_lock, flags); } diff --git a/kernel/power/process.c b/kernel/power/process.c index bd420ca..e6e2739 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -118,7 +118,8 @@ static int try_to_freeze_tasks(bool sig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!wakeup && freezing(p) && !freezer_should_skip(p)) + if (!wakeup && !freezer_should_skip(p) && + freezing(p) && !frozen(p)) sched_show_task(p); cancel_freezing(p); } while_each_thread(g, p); -- cgit v0.10.2 From 85f1d476653f52c97ca75466b2494e67c1cbd25d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: test freezable conditions while holding freezer_lock try_to_freeze_tasks() and thaw_processes() use freezable() and frozen() as preliminary tests before initiating operations on a task. These are done without any synchronization and hinder with synchronization cleanup without any real performance benefits. In try_to_freeze_tasks(), open code self test and move PF_NOFREEZE and frozen() tests inside freezer_lock in freeze_task(). thaw_processes() can simply drop freezable() test as frozen() test in __thaw_task() is enough. Note: This used to be a part of larger patch to fix set_freezable() race. Separated out to satisfy ordering among dependent fixes. Signed-off-by: Tejun Heo Cc: Oleg Nesterov diff --git a/kernel/freezer.c b/kernel/freezer.c index a8822be..a257ecd 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -109,7 +109,8 @@ bool freeze_task(struct task_struct *p, bool sig_only) spin_lock_irqsave(&freezer_lock, flags); - if (sig_only && !should_send_signal(p)) + if ((p->flags & PF_NOFREEZE) || + (sig_only && !should_send_signal(p))) goto out_unlock; if (frozen(p)) diff --git a/kernel/power/process.c b/kernel/power/process.c index e6e2739..e59676f 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,14 +22,6 @@ */ #define TIMEOUT (20 * HZ) -static inline int freezable(struct task_struct * p) -{ - if ((p == current) || - (p->flags & PF_NOFREEZE)) - return 0; - return 1; -} - static int try_to_freeze_tasks(bool sig_only) { struct task_struct *g, *p; @@ -52,10 +44,7 @@ static int try_to_freeze_tasks(bool sig_only) todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (frozen(p) || !freezable(p)) - continue; - - if (!freeze_task(p, sig_only)) + if (p == current || !freeze_task(p, sig_only)) continue; /* @@ -181,9 +170,6 @@ void thaw_processes(void) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!freezable(p)) - continue; - if (cgroup_freezing_or_frozen(p)) continue; -- cgit v0.10.2 From 376fede80e74d98b49d1ba9ac18f23c9fd026ddd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: kill PF_FREEZING With the previous changes, there's no meaningful difference between PF_FREEZING and PF_FROZEN. Remove PF_FREEZING and use PF_FROZEN instead in task_contributes_to_load(). Signed-off-by: Tejun Heo diff --git a/include/linux/sched.h b/include/linux/sched.h index 68daf4f..d12bd03 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -220,7 +220,7 @@ extern char ___assert_task_state[1 - 2*!!( ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0) #define task_contributes_to_load(task) \ ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \ - (task->flags & PF_FREEZING) == 0) + (task->flags & PF_FROZEN) == 0) #define __set_task_state(tsk, state_value) \ do { (tsk)->state = (state_value); } while (0) @@ -1773,7 +1773,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ -#define PF_FREEZING 0x00004000 /* freeze in progress. do not account to load */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ #define PF_FROZEN 0x00010000 /* frozen for system suspend */ #define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ diff --git a/kernel/freezer.c b/kernel/freezer.c index a257ecd..b8b5621 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -44,9 +44,6 @@ repeat: recalc_sigpending(); /* We sent fake signal, clean it up */ spin_unlock_irq(¤t->sighand->siglock); - /* prevent accounting of that task to load */ - current->flags |= PF_FREEZING; - for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (!freezing(current) || @@ -56,9 +53,6 @@ repeat: schedule(); } - /* Remove the accounting blocker */ - current->flags &= ~PF_FREEZING; - /* leave FROZEN */ spin_lock_irq(&freezer_lock); if (freezing(current)) -- cgit v0.10.2 From 03afed8bc296fa70186ba832c1126228bb992465 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:24 -0800 Subject: freezer: clean up freeze_processes() failure path freeze_processes() failure path is rather messy. Freezing is canceled for workqueues and tasks which aren't frozen yet but frozen tasks are left alone and should be thawed by the caller and of course some callers (xen and kexec) didn't do it. This patch updates __thaw_task() to handle cancelation correctly and makes freeze_processes() and freeze_kernel_threads() call thaw_processes() on failure instead so that the system is fully thawed on failure. Unnecessary [suspend_]thaw_processes() calls are removed from kernel/power/hibernate.c, suspend.c and user.c. While at it, restructure error checking if clause in suspend_prepare() to be less weird. -v2: Srivatsa spotted missing removal of suspend_thaw_processes() in suspend_prepare() and error in commit message. Updated. Signed-off-by: Tejun Heo Acked-by: Srivatsa S. Bhat diff --git a/include/linux/freezer.h b/include/linux/freezer.h index ba4f512..93f411a 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -61,7 +61,6 @@ static inline bool try_to_freeze(void) } extern bool freeze_task(struct task_struct *p, bool sig_only); -extern void cancel_freezing(struct task_struct *p); #ifdef CONFIG_CGROUP_FREEZER extern int cgroup_freezing_or_frozen(struct task_struct *task); diff --git a/kernel/freezer.c b/kernel/freezer.c index b8b5621..11e32d4 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -129,21 +129,6 @@ out_unlock: return ret; } -void cancel_freezing(struct task_struct *p) -{ - unsigned long flags; - - spin_lock_irqsave(&freezer_lock, flags); - if (freezing(p)) { - pr_debug(" clean up: %s\n", p->comm); - clear_freeze_flag(p); - spin_lock(&p->sighand->siglock); - recalc_sigpending_and_wake(p); - spin_unlock(&p->sighand->siglock); - } - spin_unlock_irqrestore(&freezer_lock, flags); -} - void __thaw_task(struct task_struct *p) { unsigned long flags; @@ -153,10 +138,18 @@ void __thaw_task(struct task_struct *p) * be visible to @p as waking up implies wmb. Waking up inside * freezer_lock also prevents wakeups from leaking outside * refrigerator. + * + * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to + * avoid leaving dangling TIF_SIGPENDING behind. */ spin_lock_irqsave(&freezer_lock, flags); clear_freeze_flag(p); - if (frozen(p)) + if (frozen(p)) { wake_up_process(p); + } else { + spin_lock(&p->sighand->siglock); + recalc_sigpending_and_wake(p); + spin_unlock(&p->sighand->siglock); + } spin_unlock_irqrestore(&freezer_lock, flags); } diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 196c0126..ba2319f 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -607,17 +607,6 @@ static void power_down(void) while(1); } -static int prepare_processes(void) -{ - int error = 0; - - if (freeze_processes()) { - error = -EBUSY; - thaw_processes(); - } - return error; -} - /** * hibernate - Carry out system hibernation, including saving the image. */ @@ -650,7 +639,7 @@ int hibernate(void) sys_sync(); printk("done.\n"); - error = prepare_processes(); + error = freeze_processes(); if (error) goto Finish; @@ -811,7 +800,7 @@ static int software_resume(void) goto close_finish; pr_debug("PM: Preparing processes for restore.\n"); - error = prepare_processes(); + error = freeze_processes(); if (error) { swsusp_close(FMODE_READ); goto Done; diff --git a/kernel/power/process.c b/kernel/power/process.c index e59676f..ce64383 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -91,11 +91,6 @@ static int try_to_freeze_tasks(bool sig_only) elapsed_csecs = elapsed_csecs64; if (todo) { - /* This does not unfreeze processes that are already frozen - * (we have slightly ugly calling convention in that respect, - * and caller must call thaw_processes() if something fails), - * but it cleans up leftover PF_FREEZE requests. - */ printk("\n"); printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds " "(%d tasks refusing to freeze, wq_busy=%d):\n", @@ -103,14 +98,11 @@ static int try_to_freeze_tasks(bool sig_only) elapsed_csecs / 100, elapsed_csecs % 100, todo - wq_busy, wq_busy); - thaw_workqueues(); - read_lock(&tasklist_lock); do_each_thread(g, p) { if (!wakeup && !freezer_should_skip(p) && freezing(p) && !frozen(p)) sched_show_task(p); - cancel_freezing(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); } else { @@ -123,6 +115,8 @@ static int try_to_freeze_tasks(bool sig_only) /** * freeze_processes - Signal user space processes to enter the refrigerator. + * + * On success, returns 0. On failure, -errno and system is fully thawed. */ int freeze_processes(void) { @@ -137,11 +131,15 @@ int freeze_processes(void) printk("\n"); BUG_ON(in_atomic()); + if (error) + thaw_processes(); return error; } /** * freeze_kernel_threads - Make freezable kernel threads go to the refrigerator. + * + * On success, returns 0. On failure, -errno and system is fully thawed. */ int freeze_kernel_threads(void) { @@ -155,6 +153,8 @@ int freeze_kernel_threads(void) printk("\n"); BUG_ON(in_atomic()); + if (error) + thaw_processes(); return error; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4953dc0..d336b27 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -106,13 +106,11 @@ static int suspend_prepare(void) goto Finish; error = suspend_freeze_processes(); - if (error) { - suspend_stats.failed_freeze++; - dpm_save_failed_step(SUSPEND_FREEZE); - } else + if (!error) return 0; - suspend_thaw_processes(); + suspend_stats.failed_freeze++; + dpm_save_failed_step(SUSPEND_FREEZE); usermodehelper_enable(); Finish: pm_notifier_call_chain(PM_POST_SUSPEND); diff --git a/kernel/power/user.c b/kernel/power/user.c index 6d8f535..7cc3f5b 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -257,10 +257,8 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, break; error = freeze_processes(); - if (error) { - thaw_processes(); + if (error) usermodehelper_enable(); - } if (!error) data->frozen = 1; break; -- cgit v0.10.2 From 22b4e111fa01a1147aa562ceaf18a752a928ef4e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: cgroup_freezer: prepare for removal of TIF_FREEZE TIF_FREEZE will be removed soon and freezing() will directly test whether any freezing condition is in effect. Make the following changes in preparation. * Rename cgroup_freezing_or_frozen() to cgroup_freezing() and make it return bool. * Make cgroup_freezing() access task_freezer() under rcu read lock instead of task_lock(). This makes the state dereferencing racy against task moving to another cgroup; however, it was already racy without this change as ->state dereference wasn't synchronized. This will be later dealt with using attach hooks. * freezer->state is now set before trying to push tasks into the target state. -v2: Oleg pointed out that freeze_change_state() was setting freeze->state incorrectly to CGROUP_FROZEN instead of CGROUP_FREEZING. Fixed. -v3: Matt pointed out that setting CGROUP_FROZEN used to always invoke try_to_freeze_cgroup() regardless of the current state. Patch updated such that the actual freeze/thaw operations are always performed on invocation. This shouldn't make any difference unless something is broken. Signed-off-by: Tejun Heo Acked-by: Paul Menage Cc: Li Zefan Cc: Oleg Nesterov diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 93f411a..b2b4abc 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -63,11 +63,11 @@ static inline bool try_to_freeze(void) extern bool freeze_task(struct task_struct *p, bool sig_only); #ifdef CONFIG_CGROUP_FREEZER -extern int cgroup_freezing_or_frozen(struct task_struct *task); +extern bool cgroup_freezing(struct task_struct *task); #else /* !CONFIG_CGROUP_FREEZER */ -static inline int cgroup_freezing_or_frozen(struct task_struct *task) +static inline bool cgroup_freezing(struct task_struct *task) { - return 0; + return false; } #endif /* !CONFIG_CGROUP_FREEZER */ diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index cd27b08..e6a1b8d 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -48,19 +48,17 @@ static inline struct freezer *task_freezer(struct task_struct *task) struct freezer, css); } -static inline int __cgroup_freezing_or_frozen(struct task_struct *task) +bool cgroup_freezing(struct task_struct *task) { - enum freezer_state state = task_freezer(task)->state; - return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); -} + enum freezer_state state; + bool ret; -int cgroup_freezing_or_frozen(struct task_struct *task) -{ - int result; - task_lock(task); - result = __cgroup_freezing_or_frozen(task); - task_unlock(task); - return result; + rcu_read_lock(); + state = task_freezer(task)->state; + ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN; + rcu_read_unlock(); + + return ret; } /* @@ -102,9 +100,6 @@ struct cgroup_subsys freezer_subsys; * freezer_can_attach(): * cgroup_mutex (held by caller of can_attach) * - * cgroup_freezing_or_frozen(): - * task->alloc_lock (to get task's cgroup) - * * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): * freezer->lock * sighand->siglock (if the cgroup is freezing) @@ -177,13 +172,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss, static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) { - rcu_read_lock(); - if (__cgroup_freezing_or_frozen(tsk)) { - rcu_read_unlock(); - return -EBUSY; - } - rcu_read_unlock(); - return 0; + return cgroup_freezing(tsk) ? -EBUSY : 0; } static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) @@ -279,7 +268,6 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) struct task_struct *task; unsigned int num_cant_freeze_now = 0; - freezer->state = CGROUP_FREEZING; cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { if (!freeze_task(task, true)) @@ -303,8 +291,6 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) while ((task = cgroup_iter_next(cgroup, &it))) __thaw_task(task); cgroup_iter_end(cgroup, &it); - - freezer->state = CGROUP_THAWED; } static int freezer_change_state(struct cgroup *cgroup, @@ -318,20 +304,20 @@ static int freezer_change_state(struct cgroup *cgroup, spin_lock_irq(&freezer->lock); update_if_frozen(cgroup, freezer); - if (goal_state == freezer->state) - goto out; switch (goal_state) { case CGROUP_THAWED: + freezer->state = CGROUP_THAWED; unfreeze_cgroup(cgroup, freezer); break; case CGROUP_FROZEN: + freezer->state = CGROUP_FREEZING; retval = try_to_freeze_cgroup(cgroup, freezer); break; default: BUG(); } -out: + spin_unlock_irq(&freezer->lock); return retval; diff --git a/kernel/power/process.c b/kernel/power/process.c index ce64383..9f6f5c7 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -170,7 +170,7 @@ void thaw_processes(void) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (cgroup_freezing_or_frozen(p)) + if (cgroup_freezing(p)) continue; __thaw_task(p); -- cgit v0.10.2 From a3201227f803ad7fd43180c5195dbe5a2bf998aa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: make freezing() test freeze conditions in effect instead of TIF_FREEZE Using TIF_FREEZE for freezing worked when there was only single freezing condition (the PM one); however, now there is also the cgroup_freezer and single bit flag is getting clumsy. thaw_processes() is already testing whether cgroup freezing in in effect to avoid thawing tasks which were frozen by both PM and cgroup freezers. This is racy (nothing prevents race against cgroup freezing) and fragile. A much simpler way is to test actual freeze conditions from freezing() - ie. directly test whether PM or cgroup freezing is in effect. This patch adds variables to indicate whether and what type of freezing conditions are in effect and reimplements freezing() such that it directly tests whether any of the two freezing conditions is active and the task should freeze. On fast path, freezing() is still very cheap - it only tests system_freezing_cnt. This makes the clumsy dancing aroung TIF_FREEZE unnecessary and freeze/thaw operations more usual - updating state variables for the new state and nudging target tasks so that they notice the new state and comply. As long as the nudging happens after state update, it's race-free. * This allows use of freezing() in freeze_task(). Replace the open coded tests with freezing(). * p != current test is added to warning printing conditions in try_to_freeze_tasks() failure path. This is necessary as freezing() is now true for the task which initiated freezing too. -v2: Oleg pointed out that re-freezing FROZEN cgroup could increment system_freezing_cnt. Fixed. Signed-off-by: Tejun Heo Acked-by: Paul Menage (for the cgroup portions) diff --git a/include/linux/freezer.h b/include/linux/freezer.h index b2b4abc..8e29f2b 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -5,8 +5,13 @@ #include #include +#include #ifdef CONFIG_FREEZER +extern atomic_t system_freezing_cnt; /* nr of freezing conds in effect */ +extern bool pm_freezing; /* PM freezing in effect */ +extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ + /* * Check if a process has been frozen */ @@ -15,28 +20,16 @@ static inline int frozen(struct task_struct *p) return p->flags & PF_FROZEN; } -/* - * Check if there is a request to freeze a process - */ -static inline int freezing(struct task_struct *p) -{ - return test_tsk_thread_flag(p, TIF_FREEZE); -} +extern bool freezing_slow_path(struct task_struct *p); /* - * Request that a process be frozen - */ -static inline void set_freeze_flag(struct task_struct *p) -{ - set_tsk_thread_flag(p, TIF_FREEZE); -} - -/* - * Sometimes we may need to cancel the previous 'freeze' request + * Check if there is a request to freeze a process */ -static inline void clear_freeze_flag(struct task_struct *p) +static inline bool freezing(struct task_struct *p) { - clear_tsk_thread_flag(p, TIF_FREEZE); + if (likely(!atomic_read(&system_freezing_cnt))) + return false; + return freezing_slow_path(p); } static inline bool should_send_signal(struct task_struct *p) @@ -174,9 +167,7 @@ static inline void set_freezable_with_signal(void) }) #else /* !CONFIG_FREEZER */ static inline int frozen(struct task_struct *p) { return 0; } -static inline int freezing(struct task_struct *p) { return 0; } -static inline void set_freeze_flag(struct task_struct *p) {} -static inline void clear_freeze_flag(struct task_struct *p) {} +static inline bool freezing(struct task_struct *p) { return false; } static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e6a1b8d..2327ad1 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -145,7 +145,11 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, static void freezer_destroy(struct cgroup_subsys *ss, struct cgroup *cgroup) { - kfree(cgroup_freezer(cgroup)); + struct freezer *freezer = cgroup_freezer(cgroup); + + if (freezer->state != CGROUP_THAWED) + atomic_dec(&system_freezing_cnt); + kfree(freezer); } /* @@ -307,10 +311,14 @@ static int freezer_change_state(struct cgroup *cgroup, switch (goal_state) { case CGROUP_THAWED: + if (freezer->state != CGROUP_THAWED) + atomic_dec(&system_freezing_cnt); freezer->state = CGROUP_THAWED; unfreeze_cgroup(cgroup, freezer); break; case CGROUP_FROZEN: + if (freezer->state == CGROUP_THAWED) + atomic_inc(&system_freezing_cnt); freezer->state = CGROUP_FREEZING; retval = try_to_freeze_cgroup(cgroup, freezer); break; diff --git a/kernel/fork.c b/kernel/fork.c index ba0d172..d53316e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -997,7 +997,6 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) new_flags |= PF_FORKNOEXEC; new_flags |= PF_STARTING; p->flags = new_flags; - clear_freeze_flag(p); } SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) diff --git a/kernel/freezer.c b/kernel/freezer.c index 11e32d4..f53cd5a 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -11,9 +11,41 @@ #include #include +/* total number of freezing conditions in effect */ +atomic_t system_freezing_cnt = ATOMIC_INIT(0); +EXPORT_SYMBOL(system_freezing_cnt); + +/* indicate whether PM freezing is in effect, protected by pm_mutex */ +bool pm_freezing; +bool pm_nosig_freezing; + /* protects freezing and frozen transitions */ static DEFINE_SPINLOCK(freezer_lock); +/** + * freezing_slow_path - slow path for testing whether a task needs to be frozen + * @p: task to be tested + * + * This function is called by freezing() if system_freezing_cnt isn't zero + * and tests whether @p needs to enter and stay in frozen state. Can be + * called under any context. The freezers are responsible for ensuring the + * target tasks see the updated state. + */ +bool freezing_slow_path(struct task_struct *p) +{ + if (p->flags & PF_NOFREEZE) + return false; + + if (pm_nosig_freezing || cgroup_freezing(p)) + return true; + + if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG)) + return true; + + return false; +} +EXPORT_SYMBOL(freezing_slow_path); + /* Refrigerator is place where frozen processes are stored :-). */ bool __refrigerator(bool check_kthr_stop) { @@ -23,17 +55,11 @@ bool __refrigerator(bool check_kthr_stop) long save; /* - * Enter FROZEN. If NOFREEZE, schedule immediate thawing by - * clearing freezing. + * No point in checking freezing() again - the caller already did. + * Proceed to enter FROZEN. */ spin_lock_irq(&freezer_lock); repeat: - if (!freezing(current)) { - spin_unlock_irq(&freezer_lock); - return was_frozen; - } - if (current->flags & PF_NOFREEZE) - clear_freeze_flag(current); current->flags |= PF_FROZEN; spin_unlock_irq(&freezer_lock); @@ -99,18 +125,12 @@ static void fake_signal_wake_up(struct task_struct *p) bool freeze_task(struct task_struct *p, bool sig_only) { unsigned long flags; - bool ret = false; spin_lock_irqsave(&freezer_lock, flags); - - if ((p->flags & PF_NOFREEZE) || - (sig_only && !should_send_signal(p))) - goto out_unlock; - - if (frozen(p)) - goto out_unlock; - - set_freeze_flag(p); + if (!freezing(p) || frozen(p)) { + spin_unlock_irqrestore(&freezer_lock, flags); + return false; + } if (should_send_signal(p)) { fake_signal_wake_up(p); @@ -123,10 +143,9 @@ bool freeze_task(struct task_struct *p, bool sig_only) } else { wake_up_state(p, TASK_INTERRUPTIBLE); } - ret = true; -out_unlock: + spin_unlock_irqrestore(&freezer_lock, flags); - return ret; + return true; } void __thaw_task(struct task_struct *p) @@ -143,7 +162,6 @@ void __thaw_task(struct task_struct *p) * avoid leaving dangling TIF_SIGPENDING behind. */ spin_lock_irqsave(&freezer_lock, flags); - clear_freeze_flag(p); if (frozen(p)) { wake_up_process(p); } else { diff --git a/kernel/power/process.c b/kernel/power/process.c index 9f6f5c7..0beb51e 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -101,7 +101,7 @@ static int try_to_freeze_tasks(bool sig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { if (!wakeup && !freezer_should_skip(p) && - freezing(p) && !frozen(p)) + p != current && freezing(p) && !frozen(p)) sched_show_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); @@ -122,7 +122,11 @@ int freeze_processes(void) { int error; + if (!pm_freezing) + atomic_inc(&system_freezing_cnt); + printk("Freezing user space processes ... "); + pm_freezing = true; error = try_to_freeze_tasks(true); if (!error) { printk("done."); @@ -146,6 +150,7 @@ int freeze_kernel_threads(void) int error; printk("Freezing remaining freezable tasks ... "); + pm_nosig_freezing = true; error = try_to_freeze_tasks(false); if (!error) printk("done."); @@ -162,6 +167,11 @@ void thaw_processes(void) { struct task_struct *g, *p; + if (pm_freezing) + atomic_dec(&system_freezing_cnt); + pm_freezing = false; + pm_nosig_freezing = false; + oom_killer_enable(); printk("Restarting tasks ... "); @@ -170,9 +180,6 @@ void thaw_processes(void) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (cgroup_freezing(p)) - continue; - __thaw_task(p); } while_each_thread(g, p); read_unlock(&tasklist_lock); -- cgit v0.10.2 From d88e4cb67197d007fb778d62fe17360e970d5bfa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: remove now unused TIF_FREEZE Signed-off-by: Tejun Heo Cc: Arnd Bergmann Cc: linux-arch@vger.kernel.org diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index ff73db0..28335bd 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -79,7 +79,6 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define TIF_UAC_SIGBUS 12 /* ! userspace part of 'osf_sysinfo' */ #define TIF_MEMDIE 13 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 14 /* restore signal mask in do_signal */ -#define TIF_FREEZE 16 /* is freezing for suspend */ #define _TIF_SYSCALL_TRACE (1< Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: remove should_send_signal() and update frozen() should_send_signal() is only used in freezer.c. Exporting them only increases chance of abuse. Open code the two users and remove it. Update frozen() to return bool. Signed-off-by: Tejun Heo diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 8e29f2b..3d50913 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -15,7 +15,7 @@ extern bool pm_nosig_freezing; /* PM nosig freezing in effect */ /* * Check if a process has been frozen */ -static inline int frozen(struct task_struct *p) +static inline bool frozen(struct task_struct *p) { return p->flags & PF_FROZEN; } @@ -32,11 +32,6 @@ static inline bool freezing(struct task_struct *p) return freezing_slow_path(p); } -static inline bool should_send_signal(struct task_struct *p) -{ - return !(p->flags & PF_FREEZER_NOSIG); -} - /* Takes and releases task alloc lock using task_lock() */ extern void __thaw_task(struct task_struct *t); @@ -166,7 +161,7 @@ static inline void set_freezable_with_signal(void) __retval; \ }) #else /* !CONFIG_FREEZER */ -static inline int frozen(struct task_struct *p) { return 0; } +static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } static inline bool __refrigerator(bool check_kthr_stop) { return false; } diff --git a/kernel/freezer.c b/kernel/freezer.c index f53cd5a..95a1238 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -132,7 +132,7 @@ bool freeze_task(struct task_struct *p, bool sig_only) return false; } - if (should_send_signal(p)) { + if (!(p->flags & PF_FREEZER_NOSIG)) { fake_signal_wake_up(p); /* * fake_signal_wake_up() goes through p's scheduler -- cgit v0.10.2 From 96ee6d8539c9fc6742908d85eb9723abb5c91854 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:25 -0800 Subject: freezer: fix set_freezable[_with_signal]() race A kthread doing set_freezable*() may race with on-going PM freeze and the freezer might think all tasks are frozen while the new freezable kthread is merrily proceeding to execute code paths which aren't supposed to be executing during PM freeze. Reimplement set_freezable[_with_signal]() using __set_freezable() such that freezable PF flags are modified under freezer_lock and try_to_freeze() is called afterwards. This eliminates race condition against freezing. Note: Separated out from larger patch to resolve fix order dependency Oleg pointed out. Signed-off-by: Tejun Heo Cc: Oleg Nesterov diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 3d50913..a0f1b3a 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -49,6 +49,7 @@ static inline bool try_to_freeze(void) } extern bool freeze_task(struct task_struct *p, bool sig_only); +extern bool __set_freezable(bool with_signal); #ifdef CONFIG_CGROUP_FREEZER extern bool cgroup_freezing(struct task_struct *task); @@ -106,18 +107,18 @@ static inline int freezer_should_skip(struct task_struct *p) /* * Tell the freezer that the current task should be frozen by it */ -static inline void set_freezable(void) +static inline bool set_freezable(void) { - current->flags &= ~PF_NOFREEZE; + return __set_freezable(false); } /* * Tell the freezer that the current task should be frozen by it and that it * should send a fake signal to the task to freeze it. */ -static inline void set_freezable_with_signal(void) +static inline bool set_freezable_with_signal(void) { - current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG); + return __set_freezable(true); } /* diff --git a/kernel/freezer.c b/kernel/freezer.c index 95a1238..b1e7a7b 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -171,3 +171,28 @@ void __thaw_task(struct task_struct *p) } spin_unlock_irqrestore(&freezer_lock, flags); } + +/** + * __set_freezable - make %current freezable + * @with_signal: do we want %TIF_SIGPENDING for notification too? + * + * Mark %current freezable and enter refrigerator if necessary. + */ +bool __set_freezable(bool with_signal) +{ + might_sleep(); + + /* + * Modify flags while holding freezer_lock. This ensures the + * freezer notices that we aren't frozen yet or the freezing + * condition is visible to try_to_freeze() below. + */ + spin_lock_irq(&freezer_lock); + current->flags &= ~PF_NOFREEZE; + if (with_signal) + current->flags &= ~PF_FREEZER_NOSIG; + spin_unlock_irq(&freezer_lock); + + return try_to_freeze(); +} +EXPORT_SYMBOL(__set_freezable); -- cgit v0.10.2 From 5ece3eae4cdb968c269e0bc7e2c0e2b223552025 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:26 -0800 Subject: freezer: restructure __refrigerator() If another freeze happens before all tasks leave FROZEN state after being thawed, the freezer can see the existing FROZEN and consider the tasks to be frozen but they can clear FROZEN without checking the new freezing(). Oleg suggested restructuring __refrigerator() such that there's single condition check section inside freezer_lock and sigpending is cleared afterwards, which fixes the problem and simplifies the code. Restructure accordingly. -v2: Frozen loop exited without releasing freezer_lock. Fixed. Signed-off-by: Tejun Heo Reported-by: Oleg Nesterov Acked-by: Oleg Nesterov Cc: "Rafael J. Wysocki" diff --git a/kernel/freezer.c b/kernel/freezer.c index b1e7a7b..c3496c6 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -52,39 +52,29 @@ bool __refrigerator(bool check_kthr_stop) /* Hmm, should we be allowed to suspend when there are realtime processes around? */ bool was_frozen = false; - long save; + long save = current->state; - /* - * No point in checking freezing() again - the caller already did. - * Proceed to enter FROZEN. - */ - spin_lock_irq(&freezer_lock); -repeat: - current->flags |= PF_FROZEN; - spin_unlock_irq(&freezer_lock); - - save = current->state; pr_debug("%s entered refrigerator\n", current->comm); - spin_lock_irq(¤t->sighand->siglock); - recalc_sigpending(); /* We sent fake signal, clean it up */ - spin_unlock_irq(¤t->sighand->siglock); - for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); + + spin_lock_irq(&freezer_lock); + current->flags |= PF_FROZEN; if (!freezing(current) || (check_kthr_stop && kthread_should_stop())) + current->flags &= ~PF_FROZEN; + spin_unlock_irq(&freezer_lock); + + if (!(current->flags & PF_FROZEN)) break; was_frozen = true; schedule(); } - /* leave FROZEN */ - spin_lock_irq(&freezer_lock); - if (freezing(current)) - goto repeat; - current->flags &= ~PF_FROZEN; - spin_unlock_irq(&freezer_lock); + spin_lock_irq(¤t->sighand->siglock); + recalc_sigpending(); /* We sent fake signal, clean it up */ + spin_unlock_irq(¤t->sighand->siglock); pr_debug("%s left refrigerator\n", current->comm); -- cgit v0.10.2 From 37ad8aca94a1da2112a7c56151390914e80d1113 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:26 -0800 Subject: freezer: use lock_task_sighand() in fake_signal_wake_up() cgroup_freezer calls freeze_task() without holding tasklist_lock and, if the task is exiting, its ->sighand may be gone by the time fake_signal_wake_up() is called. Use lock_task_sighand() instead of accessing ->sighand directly. Signed-off-by: Tejun Heo Reported-by: Oleg Nesterov Acked-by: Oleg Nesterov Cc: "Rafael J. Wysocki" Cc: Paul Menage diff --git a/kernel/freezer.c b/kernel/freezer.c index c3496c6..389549f 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -93,9 +93,10 @@ static void fake_signal_wake_up(struct task_struct *p) { unsigned long flags; - spin_lock_irqsave(&p->sighand->siglock, flags); - signal_wake_up(p, 0); - spin_unlock_irqrestore(&p->sighand->siglock, flags); + if (lock_task_sighand(p, &flags)) { + signal_wake_up(p, 0); + unlock_task_sighand(p, &flags); + } } /** -- cgit v0.10.2 From 839e3407d90a810318d17c17ceb3d5928a910704 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:26 -0800 Subject: freezer: remove unused @sig_only from freeze_task() After "freezer: make freezing() test freeze conditions in effect instead of TIF_FREEZE", freezing() returns authoritative answer on whether the current task should freeze or not and freeze_task() doesn't need or use @sig_only. Remove it. While at it, rewrite function comment for freeze_task() and rename @sig_only to @user_only in try_to_freeze_tasks(). This patch doesn't cause any functional change. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a0f1b3a..a28842e 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -48,7 +48,7 @@ static inline bool try_to_freeze(void) return __refrigerator(false); } -extern bool freeze_task(struct task_struct *p, bool sig_only); +extern bool freeze_task(struct task_struct *p); extern bool __set_freezable(bool with_signal); #ifdef CONFIG_CGROUP_FREEZER diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 2327ad1..e411a60 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -206,7 +206,7 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) /* Locking avoids race with FREEZING -> THAWED transitions. */ if (freezer->state == CGROUP_FREEZING) - freeze_task(task, true); + freeze_task(task); spin_unlock_irq(&freezer->lock); } @@ -274,7 +274,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) cgroup_iter_start(cgroup, &it); while ((task = cgroup_iter_next(cgroup, &it))) { - if (!freeze_task(task, true)) + if (!freeze_task(task)) continue; if (frozen(task)) continue; diff --git a/kernel/freezer.c b/kernel/freezer.c index 389549f..2589a61 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -100,20 +100,17 @@ static void fake_signal_wake_up(struct task_struct *p) } /** - * freeze_task - send a freeze request to given task - * @p: task to send the request to - * @sig_only: if set, the request will only be sent if the task has the - * PF_FREEZER_NOSIG flag unset - * Return value: 'false', if @sig_only is set and the task has - * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise + * freeze_task - send a freeze request to given task + * @p: task to send the request to * - * The freeze request is sent by setting the tasks's TIF_FREEZE flag and - * either sending a fake signal to it or waking it up, depending on whether - * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task - * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its - * TIF_FREEZE flag will not be set. + * If @p is freezing, the freeze request is sent by setting %TIF_FREEZE + * flag and either sending a fake signal to it or waking it up, depending + * on whether it has %PF_FREEZER_NOSIG set. + * + * RETURNS: + * %false, if @p is not freezing or already frozen; %true, otherwise */ -bool freeze_task(struct task_struct *p, bool sig_only) +bool freeze_task(struct task_struct *p) { unsigned long flags; diff --git a/kernel/power/process.c b/kernel/power/process.c index 0beb51e..77274c9 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,7 +22,7 @@ */ #define TIMEOUT (20 * HZ) -static int try_to_freeze_tasks(bool sig_only) +static int try_to_freeze_tasks(bool user_only) { struct task_struct *g, *p; unsigned long end_time; @@ -37,14 +37,14 @@ static int try_to_freeze_tasks(bool sig_only) end_time = jiffies + TIMEOUT; - if (!sig_only) + if (!user_only) freeze_workqueues_begin(); while (true) { todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (p == current || !freeze_task(p, sig_only)) + if (p == current || !freeze_task(p)) continue; /* @@ -65,7 +65,7 @@ static int try_to_freeze_tasks(bool sig_only) } while_each_thread(g, p); read_unlock(&tasklist_lock); - if (!sig_only) { + if (!user_only) { wq_busy = freeze_workqueues_busy(); todo += wq_busy; } -- cgit v0.10.2 From ec012476af73a1a8a82565a915e9b48c2e337878 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Nov 2011 12:32:26 -0800 Subject: usb_storage: don't use set_freezable_with_signal() The current implementation of set_freezable_with_signal() is buggy and tricky to get right. usb-storage is the only user and its use can be avoided trivially. All usb-storage wants is to be able to sleep with timeout and get woken up if freezing() becomes true. This can be trivially implemented by doing interruptible wait w/ freezing() included in the wait condition. There's no reason to use set_freezable_with_signal(). Perform interruptible wait on freezing() instead of using set_freezable_with_signal(), which is scheduled for removal. Signed-off-by: Tejun Heo Cc: Oleg Nesterov Cc: "Rafael J. Wysocki" Cc: Seth Forshee Cc: Alan Stern Cc: Greg Kroah-Hartman diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index c325e69..aa84b3d 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -831,7 +831,8 @@ static int usb_stor_scan_thread(void * __us) dev_dbg(dev, "device found\n"); - set_freezable_with_signal(); + set_freezable(); + /* * Wait for the timeout to expire or for a disconnect * @@ -839,16 +840,16 @@ static int usb_stor_scan_thread(void * __us) * fail to freeze, but we can't be non-freezable either. Nor can * khubd freeze while waiting for scanning to complete as it may * hold the device lock, causing a hang when suspending devices. - * So we request a fake signal when freezing and use - * interruptible sleep to kick us out of our wait early when - * freezing happens. + * So instead of using wait_event_freezable(), explicitly test + * for (DONT_SCAN || freezing) in interruptible wait and proceed + * if any of DONT_SCAN, freezing or timeout has happened. */ if (delay_use > 0) { dev_dbg(dev, "waiting for device to settle " "before scanning\n"); wait_event_interruptible_timeout(us->delay_wait, - test_bit(US_FLIDX_DONT_SCAN, &us->dflags), - delay_use * HZ); + test_bit(US_FLIDX_DONT_SCAN, &us->dflags) || + freezing(current), delay_use * HZ); } /* If the device is still connected, perform the scanning */ -- cgit v0.10.2 From adfa543e7314b36ac55a40019977de6e47946dd7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Nov 2011 09:28:16 -0800 Subject: dmatest: don't use set_freezable_with_signal() Commit 981ed70d8e (dmatest: make dmatest threads freezable) made dmatest kthread use set_freezable_with_signal(); however, the interface is scheduled to be removed in the next merge window. The problem is that unlike userland tasks there's no default place which handles signal pending state and it isn't clear who owns and/or is responsible for clearing TIF_SIGPENDING. For example, in the current code, try_to_freeze() clears TIF_SIGPENDING but it isn't sure whether it actually owns the TIF_SIGPENDING nor is it race-free - ie. the task may continue to run with TIF_SIGPENDING set after the freezable section. Unfortunately, we don't have wait_for_completion_freezable_timeout(). This patch open codes it and uses wait_event_freezable_timeout() instead and removes timeout reloading - wait_event_freezable_timeout() won't return across freezing events (currently racy but fix scheduled) and timer doesn't decrement while the task is in freezer. Although this does lose timer-reset-over-freezing, given that timeout is supposed to be long enough and failure to finish inside is considered irrecoverable, I don't think this is worth the complexity. While at it, move completion to outer scope and explain that we're ignoring dangling pointer problem after timeout. This should give slightly better chance at avoiding oops after timeout. Signed-off-by: Tejun Heo Acked-by: Dan Williams Cc: Guennadi Liakhovetski Cc: Nicolas Ferre diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index eb1d864..2b8661b 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -214,9 +214,18 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start, return error_count; } -static void dmatest_callback(void *completion) +/* poor man's completion - we want to use wait_event_freezable() on it */ +struct dmatest_done { + bool done; + wait_queue_head_t *wait; +}; + +static void dmatest_callback(void *arg) { - complete(completion); + struct dmatest_done *done = arg; + + done->done = true; + wake_up_all(done->wait); } /* @@ -235,7 +244,9 @@ static void dmatest_callback(void *completion) */ static int dmatest_func(void *data) { + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait); struct dmatest_thread *thread = data; + struct dmatest_done done = { .wait = &done_wait }; struct dma_chan *chan; const char *thread_name; unsigned int src_off, dst_off, len; @@ -252,7 +263,7 @@ static int dmatest_func(void *data) int i; thread_name = current->comm; - set_freezable_with_signal(); + set_freezable(); ret = -ENOMEM; @@ -306,9 +317,6 @@ static int dmatest_func(void *data) struct dma_async_tx_descriptor *tx = NULL; dma_addr_t dma_srcs[src_cnt]; dma_addr_t dma_dsts[dst_cnt]; - struct completion cmp; - unsigned long start, tmo, end = 0 /* compiler... */; - bool reload = true; u8 align = 0; total_tests++; @@ -391,9 +399,9 @@ static int dmatest_func(void *data) continue; } - init_completion(&cmp); + done.done = false; tx->callback = dmatest_callback; - tx->callback_param = &cmp; + tx->callback_param = &done; cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { @@ -407,20 +415,20 @@ static int dmatest_func(void *data) } dma_async_issue_pending(chan); - do { - start = jiffies; - if (reload) - end = start + msecs_to_jiffies(timeout); - else if (end <= start) - end = start + 1; - tmo = wait_for_completion_interruptible_timeout(&cmp, - end - start); - reload = try_to_freeze(); - } while (tmo == -ERESTARTSYS); + wait_event_freezable_timeout(done_wait, done.done, + msecs_to_jiffies(timeout)); status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); - if (tmo == 0) { + if (!done.done) { + /* + * We're leaving the timed out dma operation with + * dangling pointer to done_wait. To make this + * correct, we'll need to allocate wait_done for + * each test iteration and perform "who's gonna + * free it this time?" dancing. For now, just + * leave it dangling. + */ pr_warning("%s: #%u: test timed out\n", thread_name, total_tests - 1); failed_tests++; -- cgit v0.10.2 From 34b087e48367c252e343c2f8de65676a78af1e4a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Nov 2011 09:28:17 -0800 Subject: freezer: kill unused set_freezable_with_signal() There's no in-kernel user of set_freezable_with_signal() left. Mixing TIF_SIGPENDING with kernel threads can lead to nasty corner cases as kernel threads never travel signal delivery path on their own. e.g. the current implementation is buggy in the cancelation path of __thaw_task(). It calls recalc_sigpending_and_wake() in an attempt to clear TIF_SIGPENDING but the function never clears it regardless of sigpending state. This means that signallable freezable kthreads may continue executing with !freezing() && stuck TIF_SIGPENDING, which can be troublesome. This patch removes set_freezable_with_signal() along with PF_FREEZER_NOSIG and recalc_sigpending*() calls in freezer. User tasks get TIF_SIGPENDING, kernel tasks get woken up and the spurious sigpending is dealt with in the usual signal delivery path. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a28842e..a33550f 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -49,7 +49,7 @@ static inline bool try_to_freeze(void) } extern bool freeze_task(struct task_struct *p); -extern bool __set_freezable(bool with_signal); +extern bool set_freezable(void); #ifdef CONFIG_CGROUP_FREEZER extern bool cgroup_freezing(struct task_struct *task); @@ -105,23 +105,6 @@ static inline int freezer_should_skip(struct task_struct *p) } /* - * Tell the freezer that the current task should be frozen by it - */ -static inline bool set_freezable(void) -{ - return __set_freezable(false); -} - -/* - * Tell the freezer that the current task should be frozen by it and that it - * should send a fake signal to the task to freeze it. - */ -static inline bool set_freezable_with_signal(void) -{ - return __set_freezable(true); -} - -/* * Freezer-friendly wrappers around wait_event_interruptible(), * wait_event_killable() and wait_event_interruptible_timeout(), originally * defined in @@ -176,7 +159,6 @@ static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} -static inline void set_freezable_with_signal(void) {} #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) diff --git a/include/linux/sched.h b/include/linux/sched.h index d12bd03..2f90470 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1788,7 +1788,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ -#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* * Only the _current_ task can read/write to tsk->flags, but other diff --git a/kernel/freezer.c b/kernel/freezer.c index 2589a61..9815b8d 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c @@ -39,7 +39,7 @@ bool freezing_slow_path(struct task_struct *p) if (pm_nosig_freezing || cgroup_freezing(p)) return true; - if (pm_freezing && !(p->flags & PF_FREEZER_NOSIG)) + if (pm_freezing && !(p->flags & PF_KTHREAD)) return true; return false; @@ -72,10 +72,6 @@ bool __refrigerator(bool check_kthr_stop) schedule(); } - spin_lock_irq(¤t->sighand->siglock); - recalc_sigpending(); /* We sent fake signal, clean it up */ - spin_unlock_irq(¤t->sighand->siglock); - pr_debug("%s left refrigerator\n", current->comm); /* @@ -120,7 +116,7 @@ bool freeze_task(struct task_struct *p) return false; } - if (!(p->flags & PF_FREEZER_NOSIG)) { + if (!(p->flags & PF_KTHREAD)) { fake_signal_wake_up(p); /* * fake_signal_wake_up() goes through p's scheduler @@ -145,28 +141,19 @@ void __thaw_task(struct task_struct *p) * be visible to @p as waking up implies wmb. Waking up inside * freezer_lock also prevents wakeups from leaking outside * refrigerator. - * - * If !FROZEN, @p hasn't reached refrigerator, recalc sigpending to - * avoid leaving dangling TIF_SIGPENDING behind. */ spin_lock_irqsave(&freezer_lock, flags); - if (frozen(p)) { + if (frozen(p)) wake_up_process(p); - } else { - spin_lock(&p->sighand->siglock); - recalc_sigpending_and_wake(p); - spin_unlock(&p->sighand->siglock); - } spin_unlock_irqrestore(&freezer_lock, flags); } /** - * __set_freezable - make %current freezable - * @with_signal: do we want %TIF_SIGPENDING for notification too? + * set_freezable - make %current freezable * * Mark %current freezable and enter refrigerator if necessary. */ -bool __set_freezable(bool with_signal) +bool set_freezable(void) { might_sleep(); @@ -177,10 +164,8 @@ bool __set_freezable(bool with_signal) */ spin_lock_irq(&freezer_lock); current->flags &= ~PF_NOFREEZE; - if (with_signal) - current->flags &= ~PF_FREEZER_NOSIG; spin_unlock_irq(&freezer_lock); return try_to_freeze(); } -EXPORT_SYMBOL(__set_freezable); +EXPORT_SYMBOL(set_freezable); diff --git a/kernel/kthread.c b/kernel/kthread.c index 1c36dea..3d3de63 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -282,7 +282,7 @@ int kthreadd(void *unused) set_cpus_allowed_ptr(tsk, cpu_all_mask); set_mems_allowed(node_states[N_HIGH_MEMORY]); - current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; + current->flags |= PF_NOFREEZE; for (;;) { set_current_state(TASK_INTERRUPTIBLE); -- cgit v0.10.2 From 24b7ead3fb0bae267c2ee50898eb4c13aedd1e9f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 23 Nov 2011 09:28:17 -0800 Subject: freezer: fix wait_event_freezable/__thaw_task races wait_event_freezable() and friends stop the waiting if try_to_freeze() fails. This is not right, we can race with __thaw_task() and in this case - wait_event_freezable() returns the wrong ERESTARTSYS - wait_event_freezable_timeout() can return the positive value while condition == F Change the code to always check __retval/condition before return. Note: with or without this patch the timeout logic looks strange, probably we should recalc timeout if try_to_freeze() returns T. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo diff --git a/include/linux/freezer.h b/include/linux/freezer.h index a33550f..09570ac 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -122,28 +122,30 @@ static inline int freezer_should_skip(struct task_struct *p) #define wait_event_freezable(wq, condition) \ ({ \ int __retval; \ - do { \ + for (;;) { \ __retval = wait_event_interruptible(wq, \ (condition) || freezing(current)); \ - if (__retval && !freezing(current)) \ + if (__retval || (condition)) \ break; \ - else if (!(condition)) \ - __retval = -ERESTARTSYS; \ - } while (try_to_freeze()); \ + try_to_freeze(); \ + } \ __retval; \ }) - #define wait_event_freezable_timeout(wq, condition, timeout) \ ({ \ long __retval = timeout; \ - do { \ + for (;;) { \ __retval = wait_event_interruptible_timeout(wq, \ (condition) || freezing(current), \ __retval); \ - } while (try_to_freeze()); \ + if (__retval <= 0 || (condition)) \ + break; \ + try_to_freeze(); \ + } \ __retval; \ }) + #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } -- cgit v0.10.2 From 3f19f08a7ec74cfc50fbad3c5e615760afbd23a0 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 15 Nov 2011 21:59:09 +0100 Subject: PM / devfreq: separate error paths from successful path I think this change improves code readability. Signed-off-by: Axel Lin Acked-by: MyungJoo Ham Signed-off-by: Rafael J. Wysocki diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 59d24e9..c189b82 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -347,7 +347,7 @@ struct devfreq *devfreq_add_device(struct device *dev, if (!IS_ERR(devfreq)) { dev_err(dev, "%s: Unable to create devfreq for the device. It already has one.\n", __func__); err = -EINVAL; - goto out; + goto err_out; } } @@ -356,7 +356,7 @@ struct devfreq *devfreq_add_device(struct device *dev, dev_err(dev, "%s: Unable to create devfreq for the device\n", __func__); err = -ENOMEM; - goto out; + goto err_out; } mutex_init(&devfreq->lock); @@ -399,17 +399,16 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->next_polling); } mutex_unlock(&devfreq_list_lock); - goto out; +out: + return devfreq; + err_init: device_unregister(&devfreq->dev); err_dev: mutex_unlock(&devfreq->lock); kfree(devfreq); -out: - if (err) - return ERR_PTR(err); - else - return devfreq; +err_out: + return ERR_PTR(err); } /** -- cgit v0.10.2 From 341d4166175e9b7911444f5a33b1c9efb8f15c85 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Sat, 19 Nov 2011 14:39:01 +0100 Subject: PM: Fix indentation and remove extraneous whitespaces in kernel/power/main.c Lack of proper indentation of the goto statement decreases the readability of code significantly. In fact, this made me look twice at the code to check whether it really does what it should be doing. Fix this. And in the same file, there are some extra whitespaces. Get rid of them too. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/main.c b/kernel/power/main.c index 36e0f09..7d36fb3 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -3,7 +3,7 @@ * * Copyright (c) 2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab - * + * * This file is released under the GPLv2 * */ @@ -240,7 +240,7 @@ struct kobject *power_kobj; * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and * 'disk' (Suspend-to-Disk). * - * store() accepts one of those strings, translates it into the + * store() accepts one of those strings, translates it into the * proper enumerated value, and initiates a suspend transition. */ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, @@ -282,7 +282,7 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, /* First, check if we are requested to hibernate */ if (len == 4 && !strncmp(buf, "disk", len)) { error = hibernate(); - goto Exit; + goto Exit; } #ifdef CONFIG_SUSPEND -- cgit v0.10.2 From 6a76b7a9cc93dec6ae58d70f1257d234291908e0 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Mon, 21 Nov 2011 23:32:35 +0100 Subject: PM / Memory-hotplug: Avoid task freezing failures The lock_system_sleep() function is used in the memory hotplug code at several places in order to implement mutual exclusion with hibernation. However, this function tries to acquire the 'pm_mutex' lock using mutex_lock() and hence blocks in TASK_UNINTERRUPTIBLE state if it doesn't get the lock. This would lead to task freezing failures and hence hibernation failure as a consequence, even though the hibernation call path successfully acquired the lock. But it is to be noted that, since this task tries to acquire pm_mutex, if it blocks due to this, we are *100% sure* that this task is not going to run as long as hibernation sequence is in progress, since hibernation releases 'pm_mutex' only at the very end, when everything is done. And this means, this task is going to be anyway blocked for much more longer than what the freezer intends to achieve; which means, freezing and thawing doesn't really make any difference to this task! So, to fix freezing failures, we just ask the freezer to skip freezing this task, since it is already "frozen enough". But instead of calling freezer_do_not_count() and freezer_count() as it is, we use only the relevant parts of those functions, because restrictions such as 'the task should be a userspace one' etc., might not be relevant in this scenario. Signed-off-by: Srivatsa S. Bhat Acked-by: Tejun Heo Signed-off-by: Rafael J. Wysocki diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 57a6924..1f7fff4 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -380,12 +380,16 @@ static inline void unlock_system_sleep(void) {} static inline void lock_system_sleep(void) { + /* simplified freezer_do_not_count() */ + current->flags |= PF_FREEZER_SKIP; mutex_lock(&pm_mutex); } static inline void unlock_system_sleep(void) { mutex_unlock(&pm_mutex); + /* simplified freezer_count() */ + current->flags &= ~PF_FREEZER_SKIP; } #endif -- cgit v0.10.2 From d74e278aaf3b0fe4b02af67055aa71babcc0cebe Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 21 Nov 2011 23:33:28 +0100 Subject: PM / Sleep: Remove unnecessary label and jumps to it form PM core code The "End" label in device_prepare() in drivers/base/power/main.c is not necessary and the jumps to it have no real effect, so remove them all. Signed-off-by: Rafael J. Wysocki Reviewed-by: Srivatsa S. Bhat diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c3d2dfc..1172aea 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1033,22 +1033,16 @@ static int device_prepare(struct device *dev, pm_message_t state) if (dev->pm_domain->ops.prepare) error = dev->pm_domain->ops.prepare(dev); suspend_report_result(dev->pm_domain->ops.prepare, error); - if (error) - goto End; } else if (dev->type && dev->type->pm) { pm_dev_dbg(dev, state, "preparing type "); if (dev->type->pm->prepare) error = dev->type->pm->prepare(dev); suspend_report_result(dev->type->pm->prepare, error); - if (error) - goto End; } else if (dev->class && dev->class->pm) { pm_dev_dbg(dev, state, "preparing class "); if (dev->class->pm->prepare) error = dev->class->pm->prepare(dev); suspend_report_result(dev->class->pm->prepare, error); - if (error) - goto End; } else if (dev->bus && dev->bus->pm) { pm_dev_dbg(dev, state, "preparing "); if (dev->bus->pm->prepare) @@ -1056,7 +1050,6 @@ static int device_prepare(struct device *dev, pm_message_t state) suspend_report_result(dev->bus->pm->prepare, error); } - End: device_unlock(dev); return error; -- cgit v0.10.2 From 64e94aafb6a5c4f419e9b8f93950914b5ac162a9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 21 Nov 2011 23:33:55 +0100 Subject: PM / Sleep: Simplify device_suspend_noirq() Remove a few if () and return statements in device_suspend_noirq() that aren't really necessary. Signed-off-by: Rafael J. Wysocki Reviewed-by: Srivatsa S. Bhat diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 1172aea..406f82c 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -763,31 +763,23 @@ static pm_message_t resume_event(pm_message_t sleep_state) */ static int device_suspend_noirq(struct device *dev, pm_message_t state) { - int error; + int error = 0; if (dev->pm_domain) { pm_dev_dbg(dev, state, "LATE power domain "); error = pm_noirq_op(dev, &dev->pm_domain->ops, state); - if (error) - return error; } else if (dev->type && dev->type->pm) { pm_dev_dbg(dev, state, "LATE type "); error = pm_noirq_op(dev, dev->type->pm, state); - if (error) - return error; } else if (dev->class && dev->class->pm) { pm_dev_dbg(dev, state, "LATE class "); error = pm_noirq_op(dev, dev->class->pm, state); - if (error) - return error; } else if (dev->bus && dev->bus->pm) { pm_dev_dbg(dev, state, "LATE "); error = pm_noirq_op(dev, dev->bus->pm, state); - if (error) - return error; } - return 0; + return error; } /** -- cgit v0.10.2 From 953a206393b1533ceb0e7d725cc5a8c8d7ed97dd Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 22 Nov 2011 23:20:31 +0100 Subject: PM / Hibernate: Refactor and simplify hibernation_snapshot() code The goto statements in hibernation_snapshot() are a bit complex. Refactor the code to remove some of them, thereby simplifying the implementation. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index a6b0503..ebf62c3 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -333,7 +333,7 @@ static int create_image(int platform_mode) */ int hibernation_snapshot(int platform_mode) { - pm_message_t msg = PMSG_RECOVER; + pm_message_t msg; int error; error = platform_begin(platform_mode); @@ -362,26 +362,26 @@ int hibernation_snapshot(int platform_mode) error = dpm_prepare(PMSG_FREEZE); if (error) { - dpm_complete(msg); + dpm_complete(PMSG_RECOVER); goto Cleanup; } suspend_console(); pm_restrict_gfp_mask(); + error = dpm_suspend(PMSG_FREEZE); - if (error) - goto Recover_platform; - if (hibernation_test(TEST_DEVICES)) - goto Recover_platform; + if (error || hibernation_test(TEST_DEVICES)) + platform_recover(platform_mode); + else + error = create_image(platform_mode); - error = create_image(platform_mode); /* - * Control returns here (1) after the image has been created or the + * In the case that we call create_image() above, the control + * returns here (1) after the image has been created or the * image creation has failed and (2) after a successful restore. */ - Resume_devices: /* We may need to release the preallocated image pages here. */ if (error || !in_suspend) swsusp_free(); @@ -399,10 +399,6 @@ int hibernation_snapshot(int platform_mode) platform_end(platform_mode); return error; - Recover_platform: - platform_recover(platform_mode); - goto Resume_devices; - Cleanup: swsusp_free(); goto Close; -- cgit v0.10.2 From 5307427a31c50041cc4d18c49e9cce1a9303ea04 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 15 Nov 2011 21:59:21 +0100 Subject: PM / Usermodehelper: Cleanup remnants of usermodehelper_pm_callback() usermodehelper_pm_callback() no longer exists in the kernel. There are 2 comments in kernel/kmod.c that still refer to it. Also, the patch that introduced usermodehelper_pm_callback(), #included two header files: and . But these are no longer necessary. This patch updates the comments as appropriate and removes the unnecessary header file inclusions. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/kernel/kmod.c b/kernel/kmod.c index a4bea97..2142687 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -34,8 +34,6 @@ #include #include #include -#include -#include #include #include @@ -282,14 +280,14 @@ static int usermodehelper_disabled = 1; static atomic_t running_helpers = ATOMIC_INIT(0); /* - * Wait queue head used by usermodehelper_pm_callback() to wait for all running + * Wait queue head used by usermodehelper_disable() to wait for all running * helpers to finish. */ static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); /* * Time to wait for running_helpers to become zero before the setting of - * usermodehelper_disabled in usermodehelper_pm_callback() fails + * usermodehelper_disabled in usermodehelper_disable() fails */ #define RUNNING_HELPERS_TIMEOUT (5 * HZ) -- cgit v0.10.2 From d7268a31c8aabc5a29ccc7f76de84383e5f38737 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 15 Nov 2011 21:59:31 +0100 Subject: CPU: Add right qualifiers for alloc_frozen_cpus() and cpu_hotplug_pm_sync_init() Add __init for functions alloc_frozen_cpus() and cpu_hotplug_pm_sync_init() because they are only called during boot time. Add static for function cpu_hotplug_pm_sync_init() because its scope is limited in this file only. Signed-off-by: Fenghua Yu Acked-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/kernel/cpu.c b/kernel/cpu.c index 563f136..cf915b8 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -470,7 +470,7 @@ out: cpu_maps_update_done(); } -static int alloc_frozen_cpus(void) +static int __init alloc_frozen_cpus(void) { if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO)) return -ENOMEM; @@ -543,7 +543,7 @@ cpu_hotplug_pm_callback(struct notifier_block *nb, } -int cpu_hotplug_pm_sync_init(void) +static int __init cpu_hotplug_pm_sync_init(void) { pm_notifier(cpu_hotplug_pm_callback, 0); return 0; -- cgit v0.10.2 From 62c9ea6b120688d800b4d892eaf737c20a73e86b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 25 Nov 2011 00:44:55 +0100 Subject: Freezer: fix more fallout from the thaw_process rename Commit 944e192db53c "freezer: rename thaw_process() to __thaw_task() and simplify the implementation" did not create a !CONFIG_FREEZER version of __thaw_task(). Signed-off-by: Stephen Rothwell Signed-off-by: Rafael J. Wysocki diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 09570ac..c1ee283 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -149,6 +149,7 @@ static inline int freezer_should_skip(struct task_struct *p) #else /* !CONFIG_FREEZER */ static inline bool frozen(struct task_struct *p) { return false; } static inline bool freezing(struct task_struct *p) { return false; } +static inline void __thaw_task(struct task_struct *t) {} static inline bool __refrigerator(bool check_kthr_stop) { return false; } static inline int freeze_processes(void) { return -ENOSYS; } -- cgit v0.10.2 From 00dc9ad18d707f36b2fb4af98fd2cf0548d2b258 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Dec 2011 00:01:31 +0100 Subject: PM / Runtime: Use device PM QoS constraints (v2) Make the runtime PM core use device PM QoS constraints to check if it is allowed to suspend a given device, so that an error code is returned if the device's own PM QoS constraint is negative or one of its children has already been suspended for too long. If this is not the case, the maximum estimated time the device is allowed to be suspended, computed as the minimum of the device's PM QoS constraint and the PM QoS constraints of its children (reduced by the difference between the current time and their suspend times) is stored in a new device's PM field power.max_time_suspended_ns that can be used by the device's subsystem or PM domain to decide whether or not to put the device into lower-power (and presumably higher-latency) states later (if the constraint is 0, which means "no constraint", the power.max_time_suspended_ns is set to -1). Additionally, the time of execution of the subsystem-level .runtime_suspend() callback for the device is recorded in the new power.suspend_time field for later use by the device's subsystem or PM domain along with power.max_time_suspended_ns (it also is used by the core code when the device's parent is suspended). Introduce a new helper function, pm_runtime_update_max_time_suspended(), allowing subsystems and PM domains (or device drivers) to update the power.max_time_suspended_ns field, for example after changing the power state of a suspended device. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 86de6c5..03f4bd0 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -47,21 +47,29 @@ static DEFINE_MUTEX(dev_pm_qos_mtx); static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers); /** - * dev_pm_qos_read_value - Get PM QoS constraint for a given device. + * __dev_pm_qos_read_value - Get PM QoS constraint for a given device. + * @dev: Device to get the PM QoS constraint value for. + * + * This routine must be called with dev->power.lock held. + */ +s32 __dev_pm_qos_read_value(struct device *dev) +{ + struct pm_qos_constraints *c = dev->power.constraints; + + return c ? pm_qos_read_value(c) : 0; +} + +/** + * dev_pm_qos_read_value - Get PM QoS constraint for a given device (locked). * @dev: Device to get the PM QoS constraint value for. */ s32 dev_pm_qos_read_value(struct device *dev) { - struct pm_qos_constraints *c; unsigned long flags; - s32 ret = 0; + s32 ret; spin_lock_irqsave(&dev->power.lock, flags); - - c = dev->power.constraints; - if (c) - ret = pm_qos_read_value(c); - + ret = __dev_pm_qos_read_value(dev); spin_unlock_irqrestore(&dev->power.lock, flags); return ret; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 8c78443..068f7ed 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -279,6 +279,47 @@ static int rpm_callback(int (*cb)(struct device *), struct device *dev) return retval != -EACCES ? retval : -EIO; } +struct rpm_qos_data { + ktime_t time_now; + s64 constraint_ns; +}; + +/** + * rpm_update_qos_constraint - Update a given PM QoS constraint data. + * @dev: Device whose timing data to use. + * @data: PM QoS constraint data to update. + * + * Use the suspend timing data of @dev to update PM QoS constraint data pointed + * to by @data. + */ +static int rpm_update_qos_constraint(struct device *dev, void *data) +{ + struct rpm_qos_data *qos = data; + unsigned long flags; + s64 delta_ns; + int ret = 0; + + spin_lock_irqsave(&dev->power.lock, flags); + + if (dev->power.max_time_suspended_ns < 0) + goto out; + + delta_ns = dev->power.max_time_suspended_ns - + ktime_to_ns(ktime_sub(qos->time_now, dev->power.suspend_time)); + if (delta_ns <= 0) { + ret = -EBUSY; + goto out; + } + + if (qos->constraint_ns > delta_ns || qos->constraint_ns == 0) + qos->constraint_ns = delta_ns; + + out: + spin_unlock_irqrestore(&dev->power.lock, flags); + + return ret; +} + /** * rpm_suspend - Carry out runtime suspend of given device. * @dev: Device to suspend. @@ -305,6 +346,7 @@ static int rpm_suspend(struct device *dev, int rpmflags) { int (*callback)(struct device *); struct device *parent = NULL; + struct rpm_qos_data qos; int retval; trace_rpm_suspend(dev, rpmflags); @@ -400,8 +442,38 @@ static int rpm_suspend(struct device *dev, int rpmflags) goto out; } + qos.constraint_ns = __dev_pm_qos_read_value(dev); + if (qos.constraint_ns < 0) { + /* Negative constraint means "never suspend". */ + retval = -EPERM; + goto out; + } + qos.constraint_ns *= NSEC_PER_USEC; + qos.time_now = ktime_get(); + __update_runtime_status(dev, RPM_SUSPENDING); + if (!dev->power.ignore_children) { + if (dev->power.irq_safe) + spin_unlock(&dev->power.lock); + else + spin_unlock_irq(&dev->power.lock); + + retval = device_for_each_child(dev, &qos, + rpm_update_qos_constraint); + + if (dev->power.irq_safe) + spin_lock(&dev->power.lock); + else + spin_lock_irq(&dev->power.lock); + + if (retval) + goto fail; + } + + dev->power.suspend_time = qos.time_now; + dev->power.max_time_suspended_ns = qos.constraint_ns ? : -1; + if (dev->pm_domain) callback = dev->pm_domain->ops.runtime_suspend; else if (dev->type && dev->type->pm) @@ -414,27 +486,9 @@ static int rpm_suspend(struct device *dev, int rpmflags) callback = NULL; retval = rpm_callback(callback, dev); - if (retval) { - __update_runtime_status(dev, RPM_ACTIVE); - dev->power.deferred_resume = false; - if (retval == -EAGAIN || retval == -EBUSY) { - dev->power.runtime_error = 0; + if (retval) + goto fail; - /* - * If the callback routine failed an autosuspend, and - * if the last_busy time has been updated so that there - * is a new autosuspend expiration time, automatically - * reschedule another autosuspend. - */ - if ((rpmflags & RPM_AUTO) && - pm_runtime_autosuspend_expiration(dev) != 0) - goto repeat; - } else { - pm_runtime_cancel_pending(dev); - } - wake_up_all(&dev->power.wait_queue); - goto out; - } no_callback: __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_deactivate_timer(dev); @@ -466,6 +520,29 @@ static int rpm_suspend(struct device *dev, int rpmflags) trace_rpm_return_int(dev, _THIS_IP_, retval); return retval; + + fail: + __update_runtime_status(dev, RPM_ACTIVE); + dev->power.suspend_time = ktime_set(0, 0); + dev->power.max_time_suspended_ns = -1; + dev->power.deferred_resume = false; + if (retval == -EAGAIN || retval == -EBUSY) { + dev->power.runtime_error = 0; + + /* + * If the callback routine failed an autosuspend, and + * if the last_busy time has been updated so that there + * is a new autosuspend expiration time, automatically + * reschedule another autosuspend. + */ + if ((rpmflags & RPM_AUTO) && + pm_runtime_autosuspend_expiration(dev) != 0) + goto repeat; + } else { + pm_runtime_cancel_pending(dev); + } + wake_up_all(&dev->power.wait_queue); + goto out; } /** @@ -620,6 +697,9 @@ static int rpm_resume(struct device *dev, int rpmflags) if (dev->power.no_callbacks) goto no_callback; /* Assume success. */ + dev->power.suspend_time = ktime_set(0, 0); + dev->power.max_time_suspended_ns = -1; + __update_runtime_status(dev, RPM_RESUMING); if (dev->pm_domain) @@ -1279,6 +1359,9 @@ void pm_runtime_init(struct device *dev) setup_timer(&dev->power.suspend_timer, pm_suspend_timer_fn, (unsigned long)dev); + dev->power.suspend_time = ktime_set(0, 0); + dev->power.max_time_suspended_ns = -1; + init_waitqueue_head(&dev->power.wait_queue); } @@ -1296,3 +1379,28 @@ void pm_runtime_remove(struct device *dev) if (dev->power.irq_safe && dev->parent) pm_runtime_put_sync(dev->parent); } + +/** + * pm_runtime_update_max_time_suspended - Update device's suspend time data. + * @dev: Device to handle. + * @delta_ns: Value to subtract from the device's max_time_suspended_ns field. + * + * Update the device's power.max_time_suspended_ns field by subtracting + * @delta_ns from it. The resulting value of power.max_time_suspended_ns is + * never negative. + */ +void pm_runtime_update_max_time_suspended(struct device *dev, s64 delta_ns) +{ + unsigned long flags; + + spin_lock_irqsave(&dev->power.lock, flags); + + if (delta_ns > 0 && dev->power.max_time_suspended_ns > 0) { + if (dev->power.max_time_suspended_ns > delta_ns) + dev->power.max_time_suspended_ns -= delta_ns; + else + dev->power.max_time_suspended_ns = 0; + } + + spin_unlock_irqrestore(&dev->power.lock, flags); +} diff --git a/include/linux/pm.h b/include/linux/pm.h index 3f3ed83..a7676ef 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -521,6 +521,8 @@ struct dev_pm_info { unsigned long active_jiffies; unsigned long suspended_jiffies; unsigned long accounting_timestamp; + ktime_t suspend_time; + s64 max_time_suspended_ns; #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ struct pm_qos_constraints *constraints; diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 83b0ea3..775a323 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -78,6 +78,7 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier); int pm_qos_request_active(struct pm_qos_request *req); s32 pm_qos_read_value(struct pm_qos_constraints *c); +s32 __dev_pm_qos_read_value(struct device *dev); s32 dev_pm_qos_read_value(struct device *dev); int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, s32 value); @@ -119,6 +120,8 @@ static inline int pm_qos_request_active(struct pm_qos_request *req) static inline s32 pm_qos_read_value(struct pm_qos_constraints *c) { return 0; } +static inline s32 __dev_pm_qos_read_value(struct device *dev) + { return 0; } static inline s32 dev_pm_qos_read_value(struct device *dev) { return 0; } static inline int dev_pm_qos_add_request(struct device *dev, diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index d3085e7..609daae 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -45,6 +45,8 @@ extern void pm_runtime_irq_safe(struct device *dev); extern void __pm_runtime_use_autosuspend(struct device *dev, bool use); extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay); extern unsigned long pm_runtime_autosuspend_expiration(struct device *dev); +extern void pm_runtime_update_max_time_suspended(struct device *dev, + s64 delta_ns); static inline bool pm_children_suspended(struct device *dev) { @@ -148,6 +150,9 @@ static inline void pm_runtime_set_autosuspend_delay(struct device *dev, static inline unsigned long pm_runtime_autosuspend_expiration( struct device *dev) { return 0; } +static inline void pm_runtime_update_max_time_suspended(struct device *dev, + s64 delta_ns) {} + #endif /* !CONFIG_PM_RUNTIME */ static inline int pm_runtime_idle(struct device *dev) -- cgit v0.10.2 From d5e4cbfe2049fca375cb19c4bc0cf676e8b4a88a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 27 Nov 2011 13:11:36 +0100 Subject: PM / Domains: Make it possible to use per-device domain callbacks The current generic PM domains code requires that the same .stop(), .start() and .active_wakeup() device callback routines be used for all devices in the given domain, which is inflexible and may not cover some specific use cases. For this reason, make it possible to use device specific .start()/.stop() and .active_wakeup() callback routines by adding corresponding callback pointers to struct generic_pm_domain_data. Add a new helper routine, pm_genpd_register_callbacks(), that can be used to populate the new per-device callback pointers. Modify the shmobile's power domains code to allow drivers to add their own code to be run during the device stop and start operations with the help of the new callback pointers. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 34bbcbf..6777bb1 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -156,7 +156,10 @@ static void sh7372_a4r_suspend(void) static bool pd_active_wakeup(struct device *dev) { - return true; + bool (*active_wakeup)(struct device *dev); + + active_wakeup = dev_gpd_data(dev)->ops.active_wakeup; + return active_wakeup ? active_wakeup(dev) : true; } static bool sh7372_power_down_forbidden(struct dev_pm_domain *domain) @@ -168,15 +171,44 @@ struct dev_power_governor sh7372_always_on_gov = { .power_down_ok = sh7372_power_down_forbidden, }; +static int sh7372_stop_dev(struct device *dev) +{ + int (*stop)(struct device *dev); + + stop = dev_gpd_data(dev)->ops.stop; + if (stop) { + int ret = stop(dev); + if (ret) + return ret; + } + return pm_clk_suspend(dev); +} + +static int sh7372_start_dev(struct device *dev) +{ + int (*start)(struct device *dev); + int ret; + + ret = pm_clk_resume(dev); + if (ret) + return ret; + + start = dev_gpd_data(dev)->ops.start; + if (start) + ret = start(dev); + + return ret; +} + void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd) { struct generic_pm_domain *genpd = &sh7372_pd->genpd; pm_genpd_init(genpd, sh7372_pd->gov, false); - genpd->stop_device = pm_clk_suspend; - genpd->start_device = pm_clk_resume; + genpd->dev_ops.stop = sh7372_stop_dev; + genpd->dev_ops.start = sh7372_start_dev; + genpd->dev_ops.active_wakeup = pd_active_wakeup; genpd->dev_irq_safe = true; - genpd->active_wakeup = pd_active_wakeup; genpd->power_off = pd_power_down; genpd->power_on = pd_power_up; __pd_power_up(sh7372_pd, false); diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 6790cf7..94afaa2 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -15,6 +15,23 @@ #include #include #include +#include + +#define GENPD_DEV_CALLBACK(genpd, type, callback, dev) \ +({ \ + type (*__routine)(struct device *__d); \ + type __ret = (type)0; \ + \ + __routine = genpd->dev_ops.callback; \ + if (__routine) { \ + __ret = __routine(dev); \ + } else { \ + __routine = dev_gpd_data(dev)->ops.callback; \ + if (__routine) \ + __ret = __routine(dev); \ + } \ + __ret; \ +}) static LIST_HEAD(gpd_list); static DEFINE_MUTEX(gpd_list_lock); @@ -29,6 +46,16 @@ static struct generic_pm_domain *dev_to_genpd(struct device *dev) return pd_to_genpd(dev->pm_domain); } +static int genpd_stop_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, stop, dev); +} + +static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, start, dev); +} + static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd) { bool ret = false; @@ -199,13 +226,9 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd, mutex_unlock(&genpd->lock); if (drv && drv->pm && drv->pm->runtime_suspend) { - if (genpd->start_device) - genpd->start_device(dev); - + genpd_start_dev(genpd, dev); ret = drv->pm->runtime_suspend(dev); - - if (genpd->stop_device) - genpd->stop_device(dev); + genpd_stop_dev(genpd, dev); } mutex_lock(&genpd->lock); @@ -235,13 +258,9 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd, mutex_unlock(&genpd->lock); if (drv && drv->pm && drv->pm->runtime_resume) { - if (genpd->start_device) - genpd->start_device(dev); - + genpd_start_dev(genpd, dev); drv->pm->runtime_resume(dev); - - if (genpd->stop_device) - genpd->stop_device(dev); + genpd_stop_dev(genpd, dev); } mutex_lock(&genpd->lock); @@ -413,6 +432,7 @@ static void genpd_power_off_work_fn(struct work_struct *work) static int pm_genpd_runtime_suspend(struct device *dev) { struct generic_pm_domain *genpd; + int ret; dev_dbg(dev, "%s()\n", __func__); @@ -422,11 +442,9 @@ static int pm_genpd_runtime_suspend(struct device *dev) might_sleep_if(!genpd->dev_irq_safe); - if (genpd->stop_device) { - int ret = genpd->stop_device(dev); - if (ret) - return ret; - } + ret = genpd_stop_dev(genpd, dev); + if (ret) + return ret; /* * If power.irq_safe is set, this routine will be run with interrupts @@ -502,8 +520,7 @@ static int pm_genpd_runtime_resume(struct device *dev) mutex_unlock(&genpd->lock); out: - if (genpd->start_device) - genpd->start_device(dev); + genpd_start_dev(genpd, dev); return 0; } @@ -534,6 +551,12 @@ static inline void genpd_power_off_work_fn(struct work_struct *work) {} #ifdef CONFIG_PM_SLEEP +static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd, + struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev); +} + /** * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters. * @genpd: PM domain to power off, if possible. @@ -590,7 +613,7 @@ static bool resume_needed(struct device *dev, struct generic_pm_domain *genpd) if (!device_can_wakeup(dev)) return false; - active_wakeup = genpd->active_wakeup && genpd->active_wakeup(dev); + active_wakeup = genpd_dev_active_wakeup(genpd, dev); return device_may_wakeup(dev) ? active_wakeup : !active_wakeup; } @@ -646,7 +669,7 @@ static int pm_genpd_prepare(struct device *dev) /* * The PM domain must be in the GPD_STATE_ACTIVE state at this point, * so pm_genpd_poweron() will return immediately, but if the device - * is suspended (e.g. it's been stopped by .stop_device()), we need + * is suspended (e.g. it's been stopped by genpd_stop_dev()), we need * to make it operational. */ pm_runtime_resume(dev); @@ -714,12 +737,10 @@ static int pm_genpd_suspend_noirq(struct device *dev) if (ret) return ret; - if (dev->power.wakeup_path - && genpd->active_wakeup && genpd->active_wakeup(dev)) + if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)) return 0; - if (genpd->stop_device) - genpd->stop_device(dev); + genpd_stop_dev(genpd, dev); /* * Since all of the "noirq" callbacks are executed sequentially, it is @@ -761,8 +782,7 @@ static int pm_genpd_resume_noirq(struct device *dev) */ pm_genpd_poweron(genpd); genpd->suspended_count--; - if (genpd->start_device) - genpd->start_device(dev); + genpd_start_dev(genpd, dev); return pm_generic_resume_noirq(dev); } @@ -836,8 +856,7 @@ static int pm_genpd_freeze_noirq(struct device *dev) if (ret) return ret; - if (genpd->stop_device) - genpd->stop_device(dev); + genpd_stop_dev(genpd, dev); return 0; } @@ -864,8 +883,7 @@ static int pm_genpd_thaw_noirq(struct device *dev) if (genpd->suspend_power_off) return 0; - if (genpd->start_device) - genpd->start_device(dev); + genpd_start_dev(genpd, dev); return pm_generic_thaw_noirq(dev); } @@ -938,12 +956,10 @@ static int pm_genpd_dev_poweroff_noirq(struct device *dev) if (ret) return ret; - if (dev->power.wakeup_path - && genpd->active_wakeup && genpd->active_wakeup(dev)) + if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)) return 0; - if (genpd->stop_device) - genpd->stop_device(dev); + genpd_stop_dev(genpd, dev); /* * Since all of the "noirq" callbacks are executed sequentially, it is @@ -993,8 +1009,7 @@ static int pm_genpd_restore_noirq(struct device *dev) pm_genpd_poweron(genpd); genpd->suspended_count--; - if (genpd->start_device) - genpd->start_device(dev); + genpd_start_dev(genpd, dev); return pm_generic_restore_noirq(dev); } @@ -1280,6 +1295,69 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, } /** + * pm_genpd_add_callbacks - Add PM domain callbacks to a given device. + * @dev: Device to add the callbacks to. + * @ops: Set of callbacks to add. + */ +int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops) +{ + struct pm_domain_data *pdd; + int ret = 0; + + if (!(dev && dev->power.subsys_data && ops)) + return -EINVAL; + + pm_runtime_disable(dev); + device_pm_lock(); + + pdd = dev->power.subsys_data->domain_data; + if (pdd) { + struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); + + gpd_data->ops = *ops; + } else { + ret = -EINVAL; + } + + device_pm_unlock(); + pm_runtime_enable(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks); + +/** + * pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device. + * @dev: Device to remove the callbacks from. + */ +int pm_genpd_remove_callbacks(struct device *dev) +{ + struct pm_domain_data *pdd; + int ret = 0; + + if (!(dev && dev->power.subsys_data)) + return -EINVAL; + + pm_runtime_disable(dev); + device_pm_lock(); + + pdd = dev->power.subsys_data->domain_data; + if (pdd) { + struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); + + gpd_data->ops = (struct gpd_dev_ops){ 0 }; + } else { + ret = -EINVAL; + } + + device_pm_unlock(); + pm_runtime_enable(dev); + + return ret; +} +EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks); + +/** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. * @gov: PM domain governor to associate with the domain (may be NULL). diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 65633e5..8949d2d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -23,6 +23,12 @@ struct dev_power_governor { bool (*power_down_ok)(struct dev_pm_domain *domain); }; +struct gpd_dev_ops { + int (*start)(struct device *dev); + int (*stop)(struct device *dev); + bool (*active_wakeup)(struct device *dev); +}; + struct generic_pm_domain { struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ @@ -45,9 +51,7 @@ struct generic_pm_domain { bool dev_irq_safe; /* Device callbacks are IRQ-safe */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); - int (*start_device)(struct device *dev); - int (*stop_device)(struct device *dev); - bool (*active_wakeup)(struct device *dev); + struct gpd_dev_ops dev_ops; }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) @@ -64,6 +68,7 @@ struct gpd_link { struct generic_pm_domain_data { struct pm_domain_data base; + struct gpd_dev_ops ops; bool need_restore; }; @@ -73,6 +78,11 @@ static inline struct generic_pm_domain_data *to_gpd_data(struct pm_domain_data * } #ifdef CONFIG_PM_GENERIC_DOMAINS +static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) +{ + return to_gpd_data(dev->power.subsys_data->domain_data); +} + extern int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev); extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, @@ -81,6 +91,8 @@ extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); +extern int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops); +extern int pm_genpd_remove_callbacks(struct device *dev); extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); extern int pm_genpd_poweron(struct generic_pm_domain *genpd); @@ -105,6 +117,15 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, { return -ENOSYS; } +static inline int pm_genpd_add_callbacks(struct device *dev, + struct gpd_dev_ops *ops) +{ + return -ENOSYS; +} +static inline int pm_genpd_remove_callbacks(struct device *dev) +{ + return -ENOSYS; +} static inline void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off) {} static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) -- cgit v0.10.2 From ecf00475f229fcf06362412ad2d15a3267e354a1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 27 Nov 2011 13:11:44 +0100 Subject: PM / Domains: Introduce "save/restore state" device callbacks The current PM domains code uses device drivers' .runtime_suspend() and .runtime_resume() callbacks as the "save device state" and "restore device state" operations, which may not be appropriate in general, because it forces drivers to assume that they always will be used with generic PM domains. However, in theory, the same hardware may be used in devices that don't belong to any PM domain, in which case it would be necessary to add "fake" PM domains to satisfy the above assumption. It also may be located in a PM domain that's not handled with the help of the generic code. To allow device drivers that may be used along with the generic PM domains code of more flexibility, introduce new device callbacks, .save_state() and .restore_state(), that can be supplied by the drivers in addition to their "standard" runtime PM callbacks. This will allow the drivers to be designed to work with generic PM domains as well as without them. For backwards compatibility, introduce default .save_state() and .restore_state() callback routines for PM domains that will execute a device driver's .runtime_suspend() and .runtime_resume() callbacks, respectively, for the given device if the driver doesn't provide its own implementations of .save_state() and .restore_state(). Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 94afaa2..3c9451b 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -56,6 +56,16 @@ static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev) return GENPD_DEV_CALLBACK(genpd, int, start, dev); } +static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, save_state, dev); +} + +static int genpd_restore_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, restore_state, dev); +} + static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd) { bool ret = false; @@ -217,7 +227,6 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd, { struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); struct device *dev = pdd->dev; - struct device_driver *drv = dev->driver; int ret = 0; if (gpd_data->need_restore) @@ -225,11 +234,9 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd, mutex_unlock(&genpd->lock); - if (drv && drv->pm && drv->pm->runtime_suspend) { - genpd_start_dev(genpd, dev); - ret = drv->pm->runtime_suspend(dev); - genpd_stop_dev(genpd, dev); - } + genpd_start_dev(genpd, dev); + ret = genpd_save_dev(genpd, dev); + genpd_stop_dev(genpd, dev); mutex_lock(&genpd->lock); @@ -250,18 +257,15 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd, { struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); struct device *dev = pdd->dev; - struct device_driver *drv = dev->driver; if (!gpd_data->need_restore) return; mutex_unlock(&genpd->lock); - if (drv && drv->pm && drv->pm->runtime_resume) { - genpd_start_dev(genpd, dev); - drv->pm->runtime_resume(dev); - genpd_stop_dev(genpd, dev); - } + genpd_start_dev(genpd, dev); + genpd_restore_dev(genpd, dev); + genpd_stop_dev(genpd, dev); mutex_lock(&genpd->lock); @@ -1358,6 +1362,44 @@ int pm_genpd_remove_callbacks(struct device *dev) EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks); /** + * pm_genpd_default_save_state - Default "save device state" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_save_state(struct device *dev) +{ + int (*cb)(struct device *__dev); + struct device_driver *drv = dev->driver; + + cb = dev_gpd_data(dev)->ops.save_state; + if (cb) + return cb(dev); + + if (drv && drv->pm && drv->pm->runtime_suspend) + return drv->pm->runtime_suspend(dev); + + return 0; +} + +/** + * pm_genpd_default_restore_state - Default PM domians "restore device state". + * @dev: Device to handle. + */ +static int pm_genpd_default_restore_state(struct device *dev) +{ + int (*cb)(struct device *__dev); + struct device_driver *drv = dev->driver; + + cb = dev_gpd_data(dev)->ops.restore_state; + if (cb) + return cb(dev); + + if (drv && drv->pm && drv->pm->runtime_resume) + return drv->pm->runtime_resume(dev); + + return 0; +} + +/** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. * @gov: PM domain governor to associate with the domain (may be NULL). @@ -1400,6 +1442,8 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq; genpd->domain.ops.restore = pm_genpd_restore; genpd->domain.ops.complete = pm_genpd_complete; + genpd->dev_ops.save_state = pm_genpd_default_save_state; + genpd->dev_ops.restore_state = pm_genpd_default_restore_state; mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 8949d2d..731080d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -26,6 +26,8 @@ struct dev_power_governor { struct gpd_dev_ops { int (*start)(struct device *dev); int (*stop)(struct device *dev); + int (*save_state)(struct device *dev); + int (*restore_state)(struct device *dev); bool (*active_wakeup)(struct device *dev); }; -- cgit v0.10.2 From d23b9b00cdde5c93b914a172cecd57d5625fcd04 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 27 Nov 2011 13:11:51 +0100 Subject: PM / Domains: Rework system suspend callback routines (v2) The current generic PM domains code attempts to use the generic system suspend operations along with the domains' device stop/start routines, which requires device drivers to assume that their system suspend/resume (and hibernation/restore) callbacks will always be used with generic PM domains. However, in theory, the same hardware may be used in devices that don't belong to any PM domain, in which case it would be necessary to add "fake" PM domains to satisfy the above assumption. Also, the domain the hardware belongs to may not be handled with the help of the generic code. To allow device drivers that may be used along with the generic PM domains code of more flexibility, add new device callbacks, .suspend(), .suspend_late(), .resume_early(), .resume(), .freeze(), .freeze_late(), .thaw_early(), and .thaw(), that can be supplied by the drivers in addition to their "standard" system suspend and hibernation callbacks. These new callbacks, if defined, will be used by the generic PM domains code for the handling of system suspend and hibernation instead of the "standard" ones. This will allow drivers to be designed to work with generic PM domains as well as without them. For backwards compatibility, introduce default implementations of the new callbacks for PM domains that will execute pm_generic_suspend(), pm_generic_suspend_noirq(), pm_generic_resume_noirq(), pm_generic_resume(), pm_generic_freeze(), pm_generic_freeze_noirq(), pm_generic_thaw_noirq(), and pm_generic_thaw(), respectively, for the given device if its driver doesn't define those callbacks. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 3c9451b..9a77080 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -561,6 +561,46 @@ static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd, return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev); } +static int genpd_suspend_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, suspend, dev); +} + +static int genpd_suspend_late(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, suspend_late, dev); +} + +static int genpd_resume_early(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, resume_early, dev); +} + +static int genpd_resume_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, resume, dev); +} + +static int genpd_freeze_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, freeze, dev); +} + +static int genpd_freeze_late(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, freeze_late, dev); +} + +static int genpd_thaw_early(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, thaw_early, dev); +} + +static int genpd_thaw_dev(struct generic_pm_domain *genpd, struct device *dev) +{ + return GENPD_DEV_CALLBACK(genpd, int, thaw, dev); +} + /** * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters. * @genpd: PM domain to power off, if possible. @@ -712,7 +752,7 @@ static int pm_genpd_suspend(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : pm_generic_suspend(dev); + return genpd->suspend_power_off ? 0 : genpd_suspend_dev(genpd, dev); } /** @@ -737,7 +777,7 @@ static int pm_genpd_suspend_noirq(struct device *dev) if (genpd->suspend_power_off) return 0; - ret = pm_generic_suspend_noirq(dev); + ret = genpd_suspend_late(genpd, dev); if (ret) return ret; @@ -788,7 +828,7 @@ static int pm_genpd_resume_noirq(struct device *dev) genpd->suspended_count--; genpd_start_dev(genpd, dev); - return pm_generic_resume_noirq(dev); + return genpd_resume_early(genpd, dev); } /** @@ -809,7 +849,7 @@ static int pm_genpd_resume(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : pm_generic_resume(dev); + return genpd->suspend_power_off ? 0 : genpd_resume_dev(genpd, dev); } /** @@ -830,7 +870,7 @@ static int pm_genpd_freeze(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : pm_generic_freeze(dev); + return genpd->suspend_power_off ? 0 : genpd_freeze_dev(genpd, dev); } /** @@ -856,7 +896,7 @@ static int pm_genpd_freeze_noirq(struct device *dev) if (genpd->suspend_power_off) return 0; - ret = pm_generic_freeze_noirq(dev); + ret = genpd_freeze_late(genpd, dev); if (ret) return ret; @@ -889,7 +929,7 @@ static int pm_genpd_thaw_noirq(struct device *dev) genpd_start_dev(genpd, dev); - return pm_generic_thaw_noirq(dev); + return genpd_thaw_early(genpd, dev); } /** @@ -910,70 +950,7 @@ static int pm_genpd_thaw(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : pm_generic_thaw(dev); -} - -/** - * pm_genpd_dev_poweroff - Power off a device belonging to an I/O PM domain. - * @dev: Device to suspend. - * - * Power off a device under the assumption that its pm_domain field points to - * the domain member of an object of type struct generic_pm_domain representing - * a PM domain consisting of I/O devices. - */ -static int pm_genpd_dev_poweroff(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_poweroff(dev); -} - -/** - * pm_genpd_dev_poweroff_noirq - Late power off of a device from a PM domain. - * @dev: Device to suspend. - * - * Carry out a late powering off of a device under the assumption that its - * pm_domain field points to the domain member of an object of type - * struct generic_pm_domain representing a PM domain consisting of I/O devices. - */ -static int pm_genpd_dev_poweroff_noirq(struct device *dev) -{ - struct generic_pm_domain *genpd; - int ret; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - if (genpd->suspend_power_off) - return 0; - - ret = pm_generic_poweroff_noirq(dev); - if (ret) - return ret; - - if (dev->power.wakeup_path && genpd_dev_active_wakeup(genpd, dev)) - return 0; - - genpd_stop_dev(genpd, dev); - - /* - * Since all of the "noirq" callbacks are executed sequentially, it is - * guaranteed that this function will never run twice in parallel for - * the same PM domain, so it is not necessary to use locking here. - */ - genpd->suspended_count++; - pm_genpd_sync_poweroff(genpd); - - return 0; + return genpd->suspend_power_off ? 0 : genpd_thaw_dev(genpd, dev); } /** @@ -1015,28 +992,7 @@ static int pm_genpd_restore_noirq(struct device *dev) genpd->suspended_count--; genpd_start_dev(genpd, dev); - return pm_generic_restore_noirq(dev); -} - -/** - * pm_genpd_restore - Restore a device belonging to an I/O power domain. - * @dev: Device to resume. - * - * Restore a device under the assumption that its pm_domain field points to the - * domain member of an object of type struct generic_pm_domain representing - * a power domain consisting of I/O devices. - */ -static int pm_genpd_restore(struct device *dev) -{ - struct generic_pm_domain *genpd; - - dev_dbg(dev, "%s()\n", __func__); - - genpd = dev_to_genpd(dev); - if (IS_ERR(genpd)) - return -EINVAL; - - return genpd->suspend_power_off ? 0 : pm_generic_restore(dev); + return genpd_resume_early(genpd, dev); } /** @@ -1086,10 +1042,7 @@ static void pm_genpd_complete(struct device *dev) #define pm_genpd_freeze_noirq NULL #define pm_genpd_thaw_noirq NULL #define pm_genpd_thaw NULL -#define pm_genpd_dev_poweroff_noirq NULL -#define pm_genpd_dev_poweroff NULL #define pm_genpd_restore_noirq NULL -#define pm_genpd_restore NULL #define pm_genpd_complete NULL #endif /* CONFIG_PM_SLEEP */ @@ -1361,6 +1314,8 @@ int pm_genpd_remove_callbacks(struct device *dev) } EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks); +/* Default device callbacks for generic PM domains. */ + /** * pm_genpd_default_save_state - Default "save device state" for PM domians. * @dev: Device to handle. @@ -1400,6 +1355,94 @@ static int pm_genpd_default_restore_state(struct device *dev) } /** + * pm_genpd_default_suspend - Default "device suspend" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_suspend(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze; + + return cb ? cb(dev) : pm_generic_suspend(dev); +} + +/** + * pm_genpd_default_suspend_late - Default "late device suspend" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_suspend_late(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late; + + return cb ? cb(dev) : pm_generic_suspend_noirq(dev); +} + +/** + * pm_genpd_default_resume_early - Default "early device resume" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_resume_early(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early; + + return cb ? cb(dev) : pm_generic_resume_noirq(dev); +} + +/** + * pm_genpd_default_resume - Default "device resume" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_resume(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw; + + return cb ? cb(dev) : pm_generic_resume(dev); +} + +/** + * pm_genpd_default_freeze - Default "device freeze" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_freeze(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze; + + return cb ? cb(dev) : pm_generic_freeze(dev); +} + +/** + * pm_genpd_default_freeze_late - Default "late device freeze" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_freeze_late(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late; + + return cb ? cb(dev) : pm_generic_freeze_noirq(dev); +} + +/** + * pm_genpd_default_thaw_early - Default "early device thaw" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_thaw_early(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early; + + return cb ? cb(dev) : pm_generic_thaw_noirq(dev); +} + +/** + * pm_genpd_default_thaw - Default "device thaw" for PM domians. + * @dev: Device to handle. + */ +static int pm_genpd_default_thaw(struct device *dev) +{ + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw; + + return cb ? cb(dev) : pm_generic_thaw(dev); +} + +/** * pm_genpd_init - Initialize a generic I/O PM domain object. * @genpd: PM domain object to initialize. * @gov: PM domain governor to associate with the domain (may be NULL). @@ -1437,13 +1480,21 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq; genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq; genpd->domain.ops.thaw = pm_genpd_thaw; - genpd->domain.ops.poweroff = pm_genpd_dev_poweroff; - genpd->domain.ops.poweroff_noirq = pm_genpd_dev_poweroff_noirq; + genpd->domain.ops.poweroff = pm_genpd_suspend; + genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq; genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq; - genpd->domain.ops.restore = pm_genpd_restore; + genpd->domain.ops.restore = pm_genpd_resume; genpd->domain.ops.complete = pm_genpd_complete; genpd->dev_ops.save_state = pm_genpd_default_save_state; genpd->dev_ops.restore_state = pm_genpd_default_restore_state; + genpd->dev_ops.freeze = pm_genpd_default_suspend; + genpd->dev_ops.freeze_late = pm_genpd_default_suspend_late; + genpd->dev_ops.thaw_early = pm_genpd_default_resume_early; + genpd->dev_ops.thaw = pm_genpd_default_resume; + genpd->dev_ops.freeze = pm_genpd_default_freeze; + genpd->dev_ops.freeze_late = pm_genpd_default_freeze_late; + genpd->dev_ops.thaw_early = pm_genpd_default_thaw_early; + genpd->dev_ops.thaw = pm_genpd_default_thaw; mutex_lock(&gpd_list_lock); list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 731080d..10a197d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -28,6 +28,14 @@ struct gpd_dev_ops { int (*stop)(struct device *dev); int (*save_state)(struct device *dev); int (*restore_state)(struct device *dev); + int (*suspend)(struct device *dev); + int (*suspend_late)(struct device *dev); + int (*resume_early)(struct device *dev); + int (*resume)(struct device *dev); + int (*freeze)(struct device *dev); + int (*freeze_late)(struct device *dev); + int (*thaw_early)(struct device *dev); + int (*thaw)(struct device *dev); bool (*active_wakeup)(struct device *dev); }; -- cgit v0.10.2 From b02c999ac325e977585abeb4caf6e0a2ee21e30b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Dec 2011 00:02:05 +0100 Subject: PM / Domains: Add device stop governor function (v4) Add a function deciding whether or not devices should be stopped in pm_genpd_runtime_suspend() depending on their PM QoS constraints and stop/start timing values. Make it possible to add information used by this function to device objects. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 6777bb1..adf1765 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -169,6 +169,7 @@ static bool sh7372_power_down_forbidden(struct dev_pm_domain *domain) struct dev_power_governor sh7372_always_on_gov = { .power_down_ok = sh7372_power_down_forbidden, + .stop_ok = default_stop_ok, }; static int sh7372_stop_dev(struct device *dev) @@ -203,8 +204,9 @@ static int sh7372_start_dev(struct device *dev) void sh7372_init_pm_domain(struct sh7372_pm_domain *sh7372_pd) { struct generic_pm_domain *genpd = &sh7372_pd->genpd; + struct dev_power_governor *gov = sh7372_pd->gov; - pm_genpd_init(genpd, sh7372_pd->gov, false); + pm_genpd_init(genpd, gov ? : &simple_qos_governor, false); genpd->dev_ops.stop = sh7372_stop_dev; genpd->dev_ops.start = sh7372_start_dev; genpd->dev_ops.active_wakeup = pd_active_wakeup; diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 81676dd..2e58ebb 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o obj-$(CONFIG_PM_RUNTIME) += runtime.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o obj-$(CONFIG_PM_OPP) += opp.o -obj-$(CONFIG_PM_GENERIC_DOMAINS) += domain.o +obj-$(CONFIG_PM_GENERIC_DOMAINS) += domain.o domain_governor.o obj-$(CONFIG_HAVE_CLK) += clock_ops.o ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 9a77080..3af9f5a 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -38,7 +38,7 @@ static DEFINE_MUTEX(gpd_list_lock); #ifdef CONFIG_PM -static struct generic_pm_domain *dev_to_genpd(struct device *dev) +struct generic_pm_domain *dev_to_genpd(struct device *dev) { if (IS_ERR_OR_NULL(dev->pm_domain)) return ERR_PTR(-EINVAL); @@ -436,6 +436,7 @@ static void genpd_power_off_work_fn(struct work_struct *work) static int pm_genpd_runtime_suspend(struct device *dev) { struct generic_pm_domain *genpd; + bool (*stop_ok)(struct device *__dev); int ret; dev_dbg(dev, "%s()\n", __func__); @@ -446,10 +447,17 @@ static int pm_genpd_runtime_suspend(struct device *dev) might_sleep_if(!genpd->dev_irq_safe); + stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL; + if (stop_ok && !stop_ok(dev)) + return -EBUSY; + ret = genpd_stop_dev(genpd, dev); if (ret) return ret; + pm_runtime_update_max_time_suspended(dev, + dev_gpd_data(dev)->td.start_latency_ns); + /* * If power.irq_safe is set, this routine will be run with interrupts * off, so it can't use mutexes. @@ -1048,11 +1056,13 @@ static void pm_genpd_complete(struct device *dev) #endif /* CONFIG_PM_SLEEP */ /** - * pm_genpd_add_device - Add a device to an I/O PM domain. + * __pm_genpd_add_device - Add a device to an I/O PM domain. * @genpd: PM domain to add the device to. * @dev: Device to be added. + * @td: Set of PM QoS timing parameters to attach to the device. */ -int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) +int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, + struct gpd_timing_data *td) { struct generic_pm_domain_data *gpd_data; struct pm_domain_data *pdd; @@ -1095,6 +1105,8 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) gpd_data->base.dev = dev; gpd_data->need_restore = false; list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); + if (td) + gpd_data->td = *td; out: genpd_release_lock(genpd); @@ -1255,8 +1267,10 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, * pm_genpd_add_callbacks - Add PM domain callbacks to a given device. * @dev: Device to add the callbacks to. * @ops: Set of callbacks to add. + * @td: Timing data to add to the device along with the callbacks (optional). */ -int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops) +int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops, + struct gpd_timing_data *td) { struct pm_domain_data *pdd; int ret = 0; @@ -1272,6 +1286,8 @@ int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops) struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); gpd_data->ops = *ops; + if (td) + gpd_data->td = *td; } else { ret = -EINVAL; } @@ -1284,10 +1300,11 @@ int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops) EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks); /** - * pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device. + * __pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device. * @dev: Device to remove the callbacks from. + * @clear_td: If set, clear the device's timing data too. */ -int pm_genpd_remove_callbacks(struct device *dev) +int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td) { struct pm_domain_data *pdd; int ret = 0; @@ -1303,6 +1320,8 @@ int pm_genpd_remove_callbacks(struct device *dev) struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); gpd_data->ops = (struct gpd_dev_ops){ 0 }; + if (clear_td) + gpd_data->td = (struct gpd_timing_data){ 0 }; } else { ret = -EINVAL; } @@ -1312,7 +1331,7 @@ int pm_genpd_remove_callbacks(struct device *dev) return ret; } -EXPORT_SYMBOL_GPL(pm_genpd_remove_callbacks); +EXPORT_SYMBOL_GPL(__pm_genpd_remove_callbacks); /* Default device callbacks for generic PM domains. */ diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c new file mode 100644 index 0000000..97b21c1 --- /dev/null +++ b/drivers/base/power/domain_governor.c @@ -0,0 +1,33 @@ +/* + * drivers/base/power/domain_governor.c - Governors for device PM domains. + * + * Copyright (C) 2011 Rafael J. Wysocki , Renesas Electronics Corp. + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include + +/** + * default_stop_ok - Default PM domain governor routine for stopping devices. + * @dev: Device to check. + */ +bool default_stop_ok(struct device *dev) +{ + struct gpd_timing_data *td = &dev_gpd_data(dev)->td; + + dev_dbg(dev, "%s()\n", __func__); + + if (dev->power.max_time_suspended_ns < 0 || td->break_even_ns == 0) + return true; + + return td->stop_latency_ns + td->start_latency_ns < td->break_even_ns + && td->break_even_ns < dev->power.max_time_suspended_ns; +} + +struct dev_power_governor simple_qos_governor = { + .stop_ok = default_stop_ok, +}; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 10a197d..f6745c2 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -21,6 +21,7 @@ enum gpd_status { struct dev_power_governor { bool (*power_down_ok)(struct dev_pm_domain *domain); + bool (*stop_ok)(struct device *dev); }; struct gpd_dev_ops { @@ -76,9 +77,16 @@ struct gpd_link { struct list_head slave_node; }; +struct gpd_timing_data { + s64 stop_latency_ns; + s64 start_latency_ns; + s64 break_even_ns; +}; + struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_dev_ops ops; + struct gpd_timing_data td; bool need_restore; }; @@ -93,20 +101,48 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev) return to_gpd_data(dev->power.subsys_data->domain_data); } -extern int pm_genpd_add_device(struct generic_pm_domain *genpd, - struct device *dev); +extern struct dev_power_governor simple_qos_governor; + +extern struct generic_pm_domain *dev_to_genpd(struct device *dev); +extern int __pm_genpd_add_device(struct generic_pm_domain *genpd, + struct device *dev, + struct gpd_timing_data *td); + +static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, + struct device *dev) +{ + return __pm_genpd_add_device(genpd, dev, NULL); +} + extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev); extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); -extern int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops); -extern int pm_genpd_remove_callbacks(struct device *dev); +extern int pm_genpd_add_callbacks(struct device *dev, + struct gpd_dev_ops *ops, + struct gpd_timing_data *td); +extern int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td); extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); + extern int pm_genpd_poweron(struct generic_pm_domain *genpd); + +extern bool default_stop_ok(struct device *dev); + #else + +static inline struct generic_pm_domain *dev_to_genpd(struct device *dev) +{ + return ERR_PTR(-ENOSYS); +} +static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd, + struct device *dev, + struct gpd_timing_data *td) +{ + return -ENOSYS; +} static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) { @@ -128,22 +164,33 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, return -ENOSYS; } static inline int pm_genpd_add_callbacks(struct device *dev, - struct gpd_dev_ops *ops) + struct gpd_dev_ops *ops, + struct gpd_timing_data *td) { return -ENOSYS; } -static inline int pm_genpd_remove_callbacks(struct device *dev) +static inline int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td) { return -ENOSYS; } -static inline void pm_genpd_init(struct generic_pm_domain *genpd, - struct dev_power_governor *gov, bool is_off) {} +static inline void pm_genpd_init(struct generic_pm_domain *genpd, bool is_off) +{ +} static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) { return -ENOSYS; } +static inline bool default_stop_ok(struct device *dev) +{ + return false; +} #endif +static inline int pm_genpd_remove_callbacks(struct device *dev) +{ + return __pm_genpd_remove_callbacks(dev, true); +} + #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); extern void pm_genpd_poweroff_unused(void); -- cgit v0.10.2 From 221e9b58380abdd6c05e11b4538597e2586ee141 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Dec 2011 00:02:10 +0100 Subject: PM / Domains: Add default power off governor function (v4) Add a function deciding whether or not a given PM domain should be powered off on the basis of the PM QoS constraints of devices belonging to it and their PM QoS timing data. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 3af9f5a..9189619 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -398,6 +398,17 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) } genpd->status = GPD_STATE_POWER_OFF; + genpd->power_off_time = ktime_get(); + + /* Update PM QoS information for devices in the domain. */ + list_for_each_entry_reverse(pdd, &genpd->dev_list, list_node) { + struct gpd_timing_data *td = &to_gpd_data(pdd)->td; + + pm_runtime_update_max_time_suspended(pdd->dev, + td->start_latency_ns + + td->restore_state_latency_ns + + genpd->power_on_latency_ns); + } list_for_each_entry(link, &genpd->slave_links, slave_node) { genpd_sd_counter_dec(link->master); @@ -1487,6 +1498,7 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->resume_count = 0; genpd->device_count = 0; genpd->suspended_count = 0; + genpd->max_off_time_ns = -1; genpd->domain.ops.runtime_suspend = pm_genpd_runtime_suspend; genpd->domain.ops.runtime_resume = pm_genpd_runtime_resume; genpd->domain.ops.runtime_idle = pm_generic_runtime_idle; diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 97b21c1..da78540 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -10,6 +10,7 @@ #include #include #include +#include /** * default_stop_ok - Default PM domain governor routine for stopping devices. @@ -28,6 +29,115 @@ bool default_stop_ok(struct device *dev) && td->break_even_ns < dev->power.max_time_suspended_ns; } +/** + * default_power_down_ok - Default generic PM domain power off governor routine. + * @pd: PM domain to check. + * + * This routine must be executed under the PM domain's lock. + */ +static bool default_power_down_ok(struct dev_pm_domain *pd) +{ + struct generic_pm_domain *genpd = pd_to_genpd(pd); + struct gpd_link *link; + struct pm_domain_data *pdd; + s64 min_dev_off_time_ns; + s64 off_on_time_ns; + ktime_t time_now = ktime_get(); + + off_on_time_ns = genpd->power_off_latency_ns + + genpd->power_on_latency_ns; + /* + * It doesn't make sense to remove power from the domain if saving + * the state of all devices in it and the power off/power on operations + * take too much time. + * + * All devices in this domain have been stopped already at this point. + */ + list_for_each_entry(pdd, &genpd->dev_list, list_node) { + if (pdd->dev->driver) + off_on_time_ns += + to_gpd_data(pdd)->td.save_state_latency_ns; + } + + /* + * Check if subdomains can be off for enough time. + * + * All subdomains have been powered off already at this point. + */ + list_for_each_entry(link, &genpd->master_links, master_node) { + struct generic_pm_domain *sd = link->slave; + s64 sd_max_off_ns = sd->max_off_time_ns; + + if (sd_max_off_ns < 0) + continue; + + sd_max_off_ns -= ktime_to_ns(ktime_sub(time_now, + sd->power_off_time)); + /* + * Check if the subdomain is allowed to be off long enough for + * the current domain to turn off and on (that's how much time + * it will have to wait worst case). + */ + if (sd_max_off_ns <= off_on_time_ns) + return false; + } + + /* + * Check if the devices in the domain can be off enough time. + */ + min_dev_off_time_ns = -1; + list_for_each_entry(pdd, &genpd->dev_list, list_node) { + struct gpd_timing_data *td; + struct device *dev = pdd->dev; + s64 dev_off_time_ns; + + if (!dev->driver || dev->power.max_time_suspended_ns < 0) + continue; + + td = &to_gpd_data(pdd)->td; + dev_off_time_ns = dev->power.max_time_suspended_ns - + (td->start_latency_ns + td->restore_state_latency_ns + + ktime_to_ns(ktime_sub(time_now, + dev->power.suspend_time))); + if (dev_off_time_ns <= off_on_time_ns) + return false; + + if (min_dev_off_time_ns > dev_off_time_ns + || min_dev_off_time_ns < 0) + min_dev_off_time_ns = dev_off_time_ns; + } + + if (min_dev_off_time_ns < 0) { + /* + * There are no latency constraints, so the domain can spend + * arbitrary time in the "off" state. + */ + genpd->max_off_time_ns = -1; + return true; + } + + /* + * The difference between the computed minimum delta and the time needed + * to turn the domain on is the maximum theoretical time this domain can + * spend in the "off" state. + */ + min_dev_off_time_ns -= genpd->power_on_latency_ns; + + /* + * If the difference between the computed minimum delta and the time + * needed to turn the domain off and back on on is smaller than the + * domain's power break even time, removing power from the domain is not + * worth it. + */ + if (genpd->break_even_ns > + min_dev_off_time_ns - genpd->power_off_latency_ns) + return false; + + genpd->max_off_time_ns = min_dev_off_time_ns; + return true; +} + struct dev_power_governor simple_qos_governor = { .stop_ok = default_stop_ok, + .power_down_ok = default_power_down_ok, }; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f6745c2..cc1a245 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -61,8 +61,13 @@ struct generic_pm_domain { bool suspend_power_off; /* Power status before system suspend */ bool dev_irq_safe; /* Device callbacks are IRQ-safe */ int (*power_off)(struct generic_pm_domain *domain); + s64 power_off_latency_ns; int (*power_on)(struct generic_pm_domain *domain); + s64 power_on_latency_ns; struct gpd_dev_ops dev_ops; + s64 break_even_ns; /* Power break even for the entire domain. */ + s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ + ktime_t power_off_time; }; static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd) @@ -80,6 +85,8 @@ struct gpd_link { struct gpd_timing_data { s64 stop_latency_ns; s64 start_latency_ns; + s64 save_state_latency_ns; + s64 restore_state_latency_ns; s64 break_even_ns; }; -- cgit v0.10.2 From 0140d8bd47f798d55c3720f7fcade9e50929a5e5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 1 Dec 2011 00:02:17 +0100 Subject: PM / Domains: Automatically update overoptimistic latency information Measure the time of execution of the .stop(), .start(), .save_state() and .restore_state() PM domain device callbacks and if the result is greater than the corresponding latency value stored in the device's struct generic_pm_domain_data object, replace the inaccurate value with the measured time. Do analogously for the PM domains' .power_off() and .power_off() callbacks. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 9189619..5a8d67d 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -33,6 +33,20 @@ __ret; \ }) +#define GENPD_DEV_TIMED_CALLBACK(genpd, type, callback, dev, field, name) \ +({ \ + ktime_t __start = ktime_get(); \ + type __retval = GENPD_DEV_CALLBACK(genpd, type, callback, dev); \ + s64 __elapsed = ktime_to_ns(ktime_sub(ktime_get(), __start)); \ + struct generic_pm_domain_data *__gpd_data = dev_gpd_data(dev); \ + if (__elapsed > __gpd_data->td.field) { \ + __gpd_data->td.field = __elapsed; \ + dev_warn(dev, name " latency exceeded, new value %lld ns\n", \ + __elapsed); \ + } \ + __retval; \ +}) + static LIST_HEAD(gpd_list); static DEFINE_MUTEX(gpd_list_lock); @@ -48,22 +62,27 @@ struct generic_pm_domain *dev_to_genpd(struct device *dev) static int genpd_stop_dev(struct generic_pm_domain *genpd, struct device *dev) { - return GENPD_DEV_CALLBACK(genpd, int, stop, dev); + return GENPD_DEV_TIMED_CALLBACK(genpd, int, stop, dev, + stop_latency_ns, "stop"); } static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev) { - return GENPD_DEV_CALLBACK(genpd, int, start, dev); + return GENPD_DEV_TIMED_CALLBACK(genpd, int, start, dev, + start_latency_ns, "start"); } static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev) { - return GENPD_DEV_CALLBACK(genpd, int, save_state, dev); + return GENPD_DEV_TIMED_CALLBACK(genpd, int, save_state, dev, + save_state_latency_ns, "state save"); } static int genpd_restore_dev(struct generic_pm_domain *genpd, struct device *dev) { - return GENPD_DEV_CALLBACK(genpd, int, restore_state, dev); + return GENPD_DEV_TIMED_CALLBACK(genpd, int, restore_state, dev, + restore_state_latency_ns, + "state restore"); } static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd) @@ -182,9 +201,16 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd) } if (genpd->power_on) { + ktime_t time_start = ktime_get(); + s64 elapsed_ns; + ret = genpd->power_on(genpd); if (ret) goto err; + + elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); + if (elapsed_ns > genpd->power_on_latency_ns) + genpd->power_on_latency_ns = elapsed_ns; } genpd_set_active(genpd); @@ -377,11 +403,16 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) } if (genpd->power_off) { + ktime_t time_start; + s64 elapsed_ns; + if (atomic_read(&genpd->sd_count) > 0) { ret = -EBUSY; goto out; } + time_start = ktime_get(); + /* * If sd_count > 0 at this point, one of the subdomains hasn't * managed to call pm_genpd_poweron() for the master yet after @@ -395,6 +426,10 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) genpd_set_active(genpd); goto out; } + + elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); + if (elapsed_ns > genpd->power_off_latency_ns) + genpd->power_off_latency_ns = elapsed_ns; } genpd->status = GPD_STATE_POWER_OFF; -- cgit v0.10.2 From 4f042cdad40e1566a53b7ae85e72b6945a4b0fde Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 1 Dec 2011 00:05:31 +0100 Subject: PM / Domains: fix compilation failure for CONFIG_PM_GENERIC_DOMAINS unset Fix the following compalitaion breakage: In file included from linux/drivers/sh/pm_runtime.c:15: linux/include/linux/pm_domain.h: In function 'dev_to_genpd': linux/include/linux/pm_domain.h:142: error: implicit declaration of function 'ERR_PTR' linux/include/linux/pm_domain.h:142: warning: return makes pointer from integer without a cast In file included from linux/include/linux/sh_clk.h:10, from linux/drivers/sh/pm_runtime.c:19: linux/include/linux/err.h: At top level: linux/include/linux/err.h:22: error: conflicting types for 'ERR_PTR' linux/include/linux/pm_domain.h:142: note: previous implicit declaration of 'ERR_PTR' was here make[3]: *** [drivers/sh/pm_runtime.o] Error 1 Reported-by: Nobuhiro Iwamatsu Signed-off-by: Guennadi Liakhovetski Signed-off-by: Rafael J. Wysocki diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index cc1a245..fbb81bc 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -10,6 +10,7 @@ #define _LINUX_PM_DOMAIN_H #include +#include enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ -- cgit v0.10.2 From 0118521cc7acb3ccbc1a01d6144ac32be9d56a4c Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 1 Dec 2011 22:32:43 +0100 Subject: PM / Hibernate: Enable usermodehelpers in software_resume() error path In the software_resume() function defined in kernel/power/hibernate.c, if the call to create_basic_memory_bitmaps() fails, the usermodehelpers are not enabled (which had been disabled in the previous step). Fix it. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index ebf62c3..1fcf9de 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -807,8 +807,10 @@ static int software_resume(void) goto close_finish; error = create_basic_memory_bitmaps(); - if (error) + if (error) { + usermodehelper_enable(); goto close_finish; + } pr_debug("PM: Preparing processes for restore.\n"); error = prepare_processes(); -- cgit v0.10.2 From 97819a26224f019e73d88bb2fd4eb5a614860461 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 1 Dec 2011 22:33:10 +0100 Subject: PM / Hibernate: Thaw processes in SNAPSHOT_CREATE_IMAGE ioctl test path Commit 2aede851ddf08666f68ffc17be446420e9d2a056 (PM / Hibernate: Freeze kernel threads after preallocating memory) moved the freezing of kernel threads to hibernation_snapshot() function. So now, if the call to hibernation_snapshot() returns early due to a successful hibernation test, the caller has to thaw processes to ensure that the system gets back to its original state. But in SNAPSHOT_CREATE_IMAGE hibernation ioctl, the caller does not thaw processes in case hibernation_snapshot() returned due to a successful freezer test. Fix this issue. But note we still send the value of 'in_suspend' (which is now 0) to userspace, because we are not in an error path per-se, and moreover, the value of in_suspend correctly depicts the situation here. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 1fcf9de..c10cb0f 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -55,7 +55,7 @@ enum { static int hibernation_mode = HIBERNATION_SHUTDOWN; -static bool freezer_test_done; +bool freezer_test_done; static const struct platform_hibernation_ops *hibernation_ops; diff --git a/kernel/power/power.h b/kernel/power/power.h index 23a2db1..0c4defe 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -50,6 +50,8 @@ static inline char *check_image_kernel(struct swsusp_info *info) #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) /* kernel/power/hibernate.c */ +extern bool freezer_test_done; + extern int hibernation_snapshot(int platform_mode); extern int hibernation_restore(int platform_mode); extern int hibernation_platform_enter(void); diff --git a/kernel/power/user.c b/kernel/power/user.c index 6d8f535..e2aff0f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -283,10 +283,15 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, } pm_restore_gfp_mask(); error = hibernation_snapshot(data->platform_support); - if (!error) + if (!error) { error = put_user(in_suspend, (int __user *)arg); - if (!error) - data->ready = 1; + if (!error && !freezer_test_done) + data->ready = 1; + if (freezer_test_done) { + freezer_test_done = false; + thaw_processes(); + } + } break; case SNAPSHOT_ATOMIC_RESTORE: -- cgit v0.10.2 From 48580ab8729865c81e148d59159fbe2aa7865511 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 1 Dec 2011 22:33:20 +0100 Subject: PM / Hibernate: Remove deprecated hibernation test modes The hibernation test modes 'test' and 'testproc' are deprecated, because the 'pm_test' framework offers much more fine-grained control for debugging suspend and hibernation related problems. So, remove the deprecated 'test' and 'testproc' hibernation test modes. Suggested-by: Rafael J. Wysocki Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c10cb0f..5314a94 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -43,8 +43,6 @@ int in_suspend __nosavedata; enum { HIBERNATION_INVALID, HIBERNATION_PLATFORM, - HIBERNATION_TEST, - HIBERNATION_TESTPROC, HIBERNATION_SHUTDOWN, HIBERNATION_REBOOT, /* keep last */ @@ -96,15 +94,6 @@ static void hibernation_debug_sleep(void) mdelay(5000); } -static int hibernation_testmode(int mode) -{ - if (hibernation_mode == mode) { - hibernation_debug_sleep(); - return 1; - } - return 0; -} - static int hibernation_test(int level) { if (pm_test_level == level) { @@ -114,7 +103,6 @@ static int hibernation_test(int level) return 0; } #else /* !CONFIG_PM_DEBUG */ -static int hibernation_testmode(int mode) { return 0; } static int hibernation_test(int level) { return 0; } #endif /* !CONFIG_PM_DEBUG */ @@ -278,8 +266,7 @@ static int create_image(int platform_mode) goto Platform_finish; error = disable_nonboot_cpus(); - if (error || hibernation_test(TEST_CPUS) - || hibernation_testmode(HIBERNATION_TEST)) + if (error || hibernation_test(TEST_CPUS)) goto Enable_cpus; local_irq_disable(); @@ -349,8 +336,7 @@ int hibernation_snapshot(int platform_mode) if (error) goto Cleanup; - if (hibernation_test(TEST_FREEZER) || - hibernation_testmode(HIBERNATION_TESTPROC)) { + if (hibernation_test(TEST_FREEZER)) { /* * Indicate to the caller that we are returning due to a @@ -586,9 +572,6 @@ int hibernation_platform_enter(void) static void power_down(void) { switch (hibernation_mode) { - case HIBERNATION_TEST: - case HIBERNATION_TESTPROC: - break; case HIBERNATION_REBOOT: kernel_restart(NULL); break; @@ -853,8 +836,6 @@ static const char * const hibernation_modes[] = { [HIBERNATION_PLATFORM] = "platform", [HIBERNATION_SHUTDOWN] = "shutdown", [HIBERNATION_REBOOT] = "reboot", - [HIBERNATION_TEST] = "test", - [HIBERNATION_TESTPROC] = "testproc", }; /* @@ -863,17 +844,15 @@ static const char * const hibernation_modes[] = { * Hibernation can be handled in several ways. There are a few different ways * to put the system into the sleep state: using the platform driver (e.g. ACPI * or other hibernation_ops), powering it off or rebooting it (for testing - * mostly), or using one of the two available test modes. + * mostly). * * The sysfs file /sys/power/disk provides an interface for selecting the * hibernation mode to use. Reading from this file causes the available modes - * to be printed. There are 5 modes that can be supported: + * to be printed. There are 3 modes that can be supported: * * 'platform' * 'shutdown' * 'reboot' - * 'test' - * 'testproc' * * If a platform hibernation driver is in use, 'platform' will be supported * and will be used by default. Otherwise, 'shutdown' will be used by default. @@ -897,8 +876,6 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, switch (i) { case HIBERNATION_SHUTDOWN: case HIBERNATION_REBOOT: - case HIBERNATION_TEST: - case HIBERNATION_TESTPROC: break; case HIBERNATION_PLATFORM: if (hibernation_ops) @@ -939,8 +916,6 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, switch (mode) { case HIBERNATION_SHUTDOWN: case HIBERNATION_REBOOT: - case HIBERNATION_TEST: - case HIBERNATION_TESTPROC: hibernation_mode = mode; break; case HIBERNATION_PLATFORM: -- cgit v0.10.2 From 5a50a7c32d630d6cdb13d69afabb0cc81b2f379c Mon Sep 17 00:00:00 2001 From: Keng-Yu Lin Date: Fri, 2 Dec 2011 00:04:23 +0100 Subject: ACPI / PM: Do not save/restore NVS on Asus K54C/K54HR The models do not resume correctly without acpi_sleep=nonvs. Signed-off-by: Keng-Yu Lin Signed-off-by: Rafael J. Wysocki diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 6d9a3ab..0a7ed69 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -476,6 +476,22 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "VGN-FW520F"), }, }, + { + .callback = init_nvs_nosave, + .ident = "Asus K54C", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "K54C"), + }, + }, + { + .callback = init_nvs_nosave, + .ident = "Asus K54HR", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "K54HR"), + }, + }, {}, }; #endif /* CONFIG_SUSPEND */ -- cgit v0.10.2 From 0c6aebe31861c470c8cfbfdfdfd72d1369a6440b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 3 Dec 2011 00:23:43 +0100 Subject: PM / Sleep: Unify diagnostic messages from device suspend/resume Make pm_op() and pm_noirq_op() use the same helper function for running callbacks, which will cause them to use the same format of diagnostic messages. This also reduces the complexity and size of the code quite a bit. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 406f82c..b570189 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -164,8 +164,9 @@ static ktime_t initcall_debug_start(struct device *dev) ktime_t calltime = ktime_set(0, 0); if (initcall_debug) { - pr_info("calling %s+ @ %i\n", - dev_name(dev), task_pid_nr(current)); + pr_info("calling %s+ @ %i, parent: %s\n", + dev_name(dev), task_pid_nr(current), + dev->parent ? dev_name(dev->parent) : "none"); calltime = ktime_get(); } @@ -210,6 +211,24 @@ static void dpm_wait_for_children(struct device *dev, bool async) device_for_each_child(dev, &async, dpm_wait_fn); } +static int dpm_run_callback(struct device *dev, int (*cb)(struct device *)) +{ + ktime_t calltime; + int error; + + if (!cb) + return 0; + + calltime = initcall_debug_start(dev); + + error = cb(dev); + suspend_report_result(cb, error); + + initcall_debug_report(dev, calltime, error); + + return error; +} + /** * pm_op - Execute the PM operation appropriate for given PM event. * @dev: Device to handle. @@ -221,59 +240,36 @@ static int pm_op(struct device *dev, pm_message_t state) { int error = 0; - ktime_t calltime; - - calltime = initcall_debug_start(dev); switch (state.event) { #ifdef CONFIG_SUSPEND case PM_EVENT_SUSPEND: - if (ops->suspend) { - error = ops->suspend(dev); - suspend_report_result(ops->suspend, error); - } + error = dpm_run_callback(dev, ops->suspend); break; case PM_EVENT_RESUME: - if (ops->resume) { - error = ops->resume(dev); - suspend_report_result(ops->resume, error); - } + error = dpm_run_callback(dev, ops->resume); break; #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATE_CALLBACKS case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: - if (ops->freeze) { - error = ops->freeze(dev); - suspend_report_result(ops->freeze, error); - } + error = dpm_run_callback(dev, ops->freeze); break; case PM_EVENT_HIBERNATE: - if (ops->poweroff) { - error = ops->poweroff(dev); - suspend_report_result(ops->poweroff, error); - } + error = dpm_run_callback(dev, ops->poweroff); break; case PM_EVENT_THAW: case PM_EVENT_RECOVER: - if (ops->thaw) { - error = ops->thaw(dev); - suspend_report_result(ops->thaw, error); - } + error = dpm_run_callback(dev, ops->thaw); break; case PM_EVENT_RESTORE: - if (ops->restore) { - error = ops->restore(dev); - suspend_report_result(ops->restore, error); - } + error = dpm_run_callback(dev, ops->restore); break; #endif /* CONFIG_HIBERNATE_CALLBACKS */ default: error = -EINVAL; } - initcall_debug_report(dev, calltime, error); - return error; } @@ -291,70 +287,36 @@ static int pm_noirq_op(struct device *dev, pm_message_t state) { int error = 0; - ktime_t calltime = ktime_set(0, 0), delta, rettime; - - if (initcall_debug) { - pr_info("calling %s+ @ %i, parent: %s\n", - dev_name(dev), task_pid_nr(current), - dev->parent ? dev_name(dev->parent) : "none"); - calltime = ktime_get(); - } switch (state.event) { #ifdef CONFIG_SUSPEND case PM_EVENT_SUSPEND: - if (ops->suspend_noirq) { - error = ops->suspend_noirq(dev); - suspend_report_result(ops->suspend_noirq, error); - } + error = dpm_run_callback(dev, ops->suspend_noirq); break; case PM_EVENT_RESUME: - if (ops->resume_noirq) { - error = ops->resume_noirq(dev); - suspend_report_result(ops->resume_noirq, error); - } + error = dpm_run_callback(dev, ops->resume_noirq); break; #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATE_CALLBACKS case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: - if (ops->freeze_noirq) { - error = ops->freeze_noirq(dev); - suspend_report_result(ops->freeze_noirq, error); - } + error = dpm_run_callback(dev, ops->freeze_noirq); break; case PM_EVENT_HIBERNATE: - if (ops->poweroff_noirq) { - error = ops->poweroff_noirq(dev); - suspend_report_result(ops->poweroff_noirq, error); - } + error = dpm_run_callback(dev, ops->poweroff_noirq); break; case PM_EVENT_THAW: case PM_EVENT_RECOVER: - if (ops->thaw_noirq) { - error = ops->thaw_noirq(dev); - suspend_report_result(ops->thaw_noirq, error); - } + error = dpm_run_callback(dev, ops->thaw_noirq); break; case PM_EVENT_RESTORE: - if (ops->restore_noirq) { - error = ops->restore_noirq(dev); - suspend_report_result(ops->restore_noirq, error); - } + error = dpm_run_callback(dev, ops->restore_noirq); break; #endif /* CONFIG_HIBERNATE_CALLBACKS */ default: error = -EINVAL; } - if (initcall_debug) { - rettime = ktime_get(); - delta = ktime_sub(rettime, calltime); - printk("initcall %s_i+ returned %d after %Ld usecs\n", - dev_name(dev), error, - (unsigned long long)ktime_to_ns(delta) >> 10); - } - return error; } @@ -486,26 +448,6 @@ void dpm_resume_noirq(pm_message_t state) EXPORT_SYMBOL_GPL(dpm_resume_noirq); /** - * legacy_resume - Execute a legacy (bus or class) resume callback for device. - * @dev: Device to resume. - * @cb: Resume callback to execute. - */ -static int legacy_resume(struct device *dev, int (*cb)(struct device *dev)) -{ - int error; - ktime_t calltime; - - calltime = initcall_debug_start(dev); - - error = cb(dev); - suspend_report_result(cb, error); - - initcall_debug_report(dev, calltime, error); - - return error; -} - -/** * device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. @@ -553,7 +495,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) goto End; } else if (dev->class->resume) { pm_dev_dbg(dev, state, "legacy class "); - error = legacy_resume(dev, dev->class->resume); + error = dpm_run_callback(dev, dev->class->resume); goto End; } } @@ -564,7 +506,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) error = pm_op(dev, dev->bus->pm, state); } else if (dev->bus->resume) { pm_dev_dbg(dev, state, "legacy "); - error = legacy_resume(dev, dev->bus->resume); + error = dpm_run_callback(dev, dev->bus->resume); } } -- cgit v0.10.2 From d310310cbff18ec385c6ab4d58f33b100192a96a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 1 Dec 2011 22:44:39 +0100 Subject: Freezer / sunrpc / NFS: don't allow TASK_KILLABLE sleeps to block the freezer Allow the freezer to skip wait_on_bit_killable sleeps in the sunrpc layer. This should allow suspend and hibernate events to proceed, even when there are RPC's pending on the wire. Also, wrap the TASK_KILLABLE sleeps in NFS layer in freezer_do_not_count and freezer_count calls. This allows the freezer to skip tasks that are sleeping while looping on EJUKEBOX or NFS4ERR_DELAY sorts of errors. Signed-off-by: Jeff Layton Signed-off-by: Rafael J. Wysocki diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 50a15fa..bf3a57b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -77,7 +78,7 @@ int nfs_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - schedule(); + freezable_schedule(); return 0; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d4bc9ed9..9194395 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "iostat.h" #include "internal.h" @@ -32,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) res = rpc_call_sync(clnt, msg, flags); if (res != -EJUKEBOX && res != -EKEYEXPIRED) break; - schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; } while (!fatal_signal_pending(current)); return res; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index be2bbac..b28bb19 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -53,6 +53,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -241,7 +242,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) *timeout = NFS4_POLL_RETRY_MIN; if (*timeout > NFS4_POLL_RETRY_MAX) *timeout = NFS4_POLL_RETRY_MAX; - schedule_timeout_killable(*timeout); + freezable_schedule_timeout_killable(*timeout); if (fatal_signal_pending(current)) res = -ERESTARTSYS; *timeout <<= 1; @@ -3950,7 +3951,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 static unsigned long nfs4_set_lock_task_retry(unsigned long timeout) { - schedule_timeout_killable(timeout); + freezable_schedule_timeout_killable(timeout); timeout <<= 1; if (timeout > NFS4_LOCK_MAXTIMEOUT) return NFS4_LOCK_MAXTIMEOUT; diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index f48125d..0c672588 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "internal.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -59,7 +60,7 @@ nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) res = rpc_call_sync(clnt, msg, flags); if (res != -EKEYEXPIRED) break; - schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); + freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); res = -ERESTARTSYS; } while (!fatal_signal_pending(current)); return res; diff --git a/include/linux/freezer.h b/include/linux/freezer.h index c1ee283..30f06c2 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -105,6 +105,29 @@ static inline int freezer_should_skip(struct task_struct *p) } /* + * These macros are intended to be used whenever you want allow a task that's + * sleeping in TASK_UNINTERRUPTIBLE or TASK_KILLABLE state to be frozen. Note + * that neither return any clear indication of whether a freeze event happened + * while in this function. + */ + +/* Like schedule(), but should not block the freezer. */ +#define freezable_schedule() \ +({ \ + freezer_do_not_count(); \ + schedule(); \ + freezer_count(); \ +}) + +/* Like schedule_timeout_killable(), but should not block the freezer. */ +#define freezable_schedule_timeout_killable(timeout) \ +({ \ + freezer_do_not_count(); \ + schedule_timeout_killable(timeout); \ + freezer_count(); \ +}) + +/* * Freezer-friendly wrappers around wait_event_interruptible(), * wait_event_killable() and wait_event_interruptible_timeout(), originally * defined in @@ -163,6 +186,11 @@ static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} +#define freezable_schedule() schedule() + +#define freezable_schedule_timeout_killable(timeout) \ + schedule_timeout_killable(timeout) + #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d12ffa5..5317b93 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -231,7 +232,7 @@ static int rpc_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) return -ERESTARTSYS; - schedule(); + freezable_schedule(); return 0; } -- cgit v0.10.2 From e5b16746f0f2d6883c226af52d90904ce0f7eee8 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Sat, 3 Dec 2011 00:20:30 +0100 Subject: PM / Hibernate: Replace unintuitive 'if' condition in kernel/power/user.c with 'else' In the snapshot_ioctl() function, under SNAPSHOT_FREEZE, the code below freeze_processes() is a bit unintuitive. Improve it by replacing the second 'if' condition with an 'else' clause. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/user.c b/kernel/power/user.c index c202e2e..06ea33d 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -259,7 +259,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = freeze_processes(); if (error) usermodehelper_enable(); - if (!error) + else data->frozen = 1; break; -- cgit v0.10.2 From e84b2c202771bbd538866207efcb1f7dbab8045b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Dec 2011 22:19:54 +0100 Subject: PM / Domains: Make it possible to assign names to generic PM domains Add a name member pointer to struct generic_pm_domain and use it in diagnostic messages regarding the domain power-off and power-on latencies. Update the ARM shmobile SH7372 code to assign names to the PM domains used by it. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index adf1765..8d9ea89 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -101,8 +101,8 @@ static int pd_power_down(struct generic_pm_domain *genpd) } if (!sh7372_pd->no_debug) - pr_debug("sh7372 power domain down 0x%08x -> PSTR = 0x%08x\n", - mask, __raw_readl(PSTR)); + pr_debug("%s: Power off, 0x%08x -> PSTR = 0x%08x\n", + genpd->name, mask, __raw_readl(PSTR)); return 0; } @@ -133,8 +133,8 @@ static int __pd_power_up(struct sh7372_pm_domain *sh7372_pd, bool do_resume) ret = -EIO; if (!sh7372_pd->no_debug) - pr_debug("sh7372 power domain up 0x%08x -> PSTR = 0x%08x\n", - mask, __raw_readl(PSTR)); + pr_debug("%s: Power on, 0x%08x -> PSTR = 0x%08x\n", + sh7372_pd->genpd.name, mask, __raw_readl(PSTR)); out: if (ret == 0 && sh7372_pd->resume && do_resume) @@ -233,18 +233,22 @@ void sh7372_pm_add_subdomain(struct sh7372_pm_domain *sh7372_pd, } struct sh7372_pm_domain sh7372_a4lc = { + .genpd.name = "A4LC", .bit_shift = 1, }; struct sh7372_pm_domain sh7372_a4mp = { + .genpd.name = "A4MP", .bit_shift = 2, }; struct sh7372_pm_domain sh7372_d4 = { + .genpd.name = "D4", .bit_shift = 3, }; struct sh7372_pm_domain sh7372_a4r = { + .genpd.name = "A4R", .bit_shift = 5, .gov = &sh7372_always_on_gov, .suspend = sh7372_a4r_suspend, @@ -253,14 +257,17 @@ struct sh7372_pm_domain sh7372_a4r = { }; struct sh7372_pm_domain sh7372_a3rv = { + .genpd.name = "A3RV", .bit_shift = 6, }; struct sh7372_pm_domain sh7372_a3ri = { + .genpd.name = "A3RI", .bit_shift = 8, }; struct sh7372_pm_domain sh7372_a3sp = { + .genpd.name = "A3SP", .bit_shift = 11, .gov = &sh7372_always_on_gov, .no_debug = true, @@ -275,6 +282,7 @@ static void sh7372_a3sp_init(void) } struct sh7372_pm_domain sh7372_a3sg = { + .genpd.name = "A3SG", .bit_shift = 13, }; diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 5a8d67d..ad6ba2e 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -209,8 +209,13 @@ int __pm_genpd_poweron(struct generic_pm_domain *genpd) goto err; elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); - if (elapsed_ns > genpd->power_on_latency_ns) + if (elapsed_ns > genpd->power_on_latency_ns) { genpd->power_on_latency_ns = elapsed_ns; + if (genpd->name) + pr_warning("%s: Power-on latency exceeded, " + "new value %lld ns\n", genpd->name, + elapsed_ns); + } } genpd_set_active(genpd); @@ -428,8 +433,13 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) } elapsed_ns = ktime_to_ns(ktime_sub(ktime_get(), time_start)); - if (elapsed_ns > genpd->power_off_latency_ns) + if (elapsed_ns > genpd->power_off_latency_ns) { genpd->power_off_latency_ns = elapsed_ns; + if (genpd->name) + pr_warning("%s: Power-off latency exceeded, " + "new value %lld ns\n", genpd->name, + elapsed_ns); + } } genpd->status = GPD_STATE_POWER_OFF; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index fbb81bc..fb809b9 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -50,6 +50,7 @@ struct generic_pm_domain { struct mutex lock; struct dev_power_governor *gov; struct work_struct power_off_work; + char *name; unsigned int in_progress; /* Number of devices being suspended now */ atomic_t sd_count; /* Number of subdomains with power "on" */ enum gpd_status status; /* Current state of the domain */ -- cgit v0.10.2 From c9914854b4ca339e511d052ce3a1a441ef15b928 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Dec 2011 23:16:47 +0100 Subject: PM / Domains: Fix default system suspend/resume operations Commit d23b9b00cdde5c93b914a172cecd57d5625fcd04 (PM / Domains: Rework system suspend callback routines (v2)) broke the system suspend and resume handling by devices belonging to generic PM domains, because it used freeze/thaw callbacks instead of suspend/resume ones and didn't initialize device callbacks for system suspend/resume properly at all. Fix those problems. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index ad6ba2e..92e6a90 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1435,7 +1435,7 @@ static int pm_genpd_default_restore_state(struct device *dev) */ static int pm_genpd_default_suspend(struct device *dev) { - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze; + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend; return cb ? cb(dev) : pm_generic_suspend(dev); } @@ -1446,7 +1446,7 @@ static int pm_genpd_default_suspend(struct device *dev) */ static int pm_genpd_default_suspend_late(struct device *dev) { - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late; + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend_late; return cb ? cb(dev) : pm_generic_suspend_noirq(dev); } @@ -1457,7 +1457,7 @@ static int pm_genpd_default_suspend_late(struct device *dev) */ static int pm_genpd_default_resume_early(struct device *dev) { - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early; + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume_early; return cb ? cb(dev) : pm_generic_resume_noirq(dev); } @@ -1468,7 +1468,7 @@ static int pm_genpd_default_resume_early(struct device *dev) */ static int pm_genpd_default_resume(struct device *dev) { - int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw; + int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume; return cb ? cb(dev) : pm_generic_resume(dev); } @@ -1563,10 +1563,10 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->domain.ops.complete = pm_genpd_complete; genpd->dev_ops.save_state = pm_genpd_default_save_state; genpd->dev_ops.restore_state = pm_genpd_default_restore_state; - genpd->dev_ops.freeze = pm_genpd_default_suspend; - genpd->dev_ops.freeze_late = pm_genpd_default_suspend_late; - genpd->dev_ops.thaw_early = pm_genpd_default_resume_early; - genpd->dev_ops.thaw = pm_genpd_default_resume; + genpd->dev_ops.suspend = pm_genpd_default_suspend; + genpd->dev_ops.suspend_late = pm_genpd_default_suspend_late; + genpd->dev_ops.resume_early = pm_genpd_default_resume_early; + genpd->dev_ops.resume = pm_genpd_default_resume; genpd->dev_ops.freeze = pm_genpd_default_freeze; genpd->dev_ops.freeze_late = pm_genpd_default_freeze_late; genpd->dev_ops.thaw_early = pm_genpd_default_thaw_early; -- cgit v0.10.2 From 467de1fc67d1bd2954eaac7019c564f28fa2b6a5 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 6 Dec 2011 23:17:51 +0100 Subject: PM / Freezer: Remove the "userspace only" constraint from freezer[_do_not]_count() At present, the functions freezer_count() and freezer_do_not_count() impose the restriction that they are effective only for userspace processes. However, now, these functions have found more utility than originally intended by the commit which introduced it: ba96a0c8 (freezer: fix vfork problem). And moreover, even the vfork issue actually does not need the above restriction in these functions. So, modify these functions to make them work even for kernel threads, so that they can be used at other places in the kernel, where the userspace restriction doesn't apply. Suggested-by: Oleg Nesterov Suggested-by: Tejun Heo Acked-by: Tejun Heo Reviewed-by: Oleg Nesterov Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 30f06c2..7bcfe73 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -67,33 +67,27 @@ static inline bool cgroup_freezing(struct task_struct *task) * appropriately in case the child has exited before the freezing of tasks is * complete. However, we don't want kernel threads to be frozen in unexpected * places, so we allow them to block freeze_processes() instead or to set - * PF_NOFREEZE if needed and PF_FREEZER_SKIP is only set for userland vfork - * parents. Fortunately, in the ____call_usermodehelper() case the parent won't - * really block freeze_processes(), since ____call_usermodehelper() (the child) - * does a little before exec/exit and it can't be frozen before waking up the - * parent. + * PF_NOFREEZE if needed. Fortunately, in the ____call_usermodehelper() case the + * parent won't really block freeze_processes(), since ____call_usermodehelper() + * (the child) does a little before exec/exit and it can't be frozen before + * waking up the parent. */ -/* - * If the current task is a user space one, tell the freezer not to count it as - * freezable. - */ + +/* Tell the freezer not to count the current task as freezable. */ static inline void freezer_do_not_count(void) { - if (current->mm) - current->flags |= PF_FREEZER_SKIP; + current->flags |= PF_FREEZER_SKIP; } /* - * If the current task is a user space one, tell the freezer to count it as - * freezable again and try to freeze it. + * Tell the freezer to count the current task as freezable again and try to + * freeze it. */ static inline void freezer_count(void) { - if (current->mm) { - current->flags &= ~PF_FREEZER_SKIP; - try_to_freeze(); - } + current->flags &= ~PF_FREEZER_SKIP; + try_to_freeze(); } /* -- cgit v0.10.2 From 33e638b9070ba5e8812836e20390da6a6af13900 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 6 Dec 2011 23:18:12 +0100 Subject: PM / Sleep: Use the freezer_count() functions in [un]lock_system_sleep() APIs Now that freezer_count() and freezer_do_not_count() don't have the restriction that they are effective only when called by userspace processes, use them in lock_system_sleep() and unlock_system_sleep() instead of open-coding their parts. Signed-off-by: Srivatsa S. Bhat Acked-by: Tejun Heo Signed-off-by: Rafael J. Wysocki diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 1f7fff4..906d62c 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef CONFIG_VT @@ -380,16 +381,14 @@ static inline void unlock_system_sleep(void) {} static inline void lock_system_sleep(void) { - /* simplified freezer_do_not_count() */ - current->flags |= PF_FREEZER_SKIP; + freezer_do_not_count(); mutex_lock(&pm_mutex); } static inline void unlock_system_sleep(void) { mutex_unlock(&pm_mutex); - /* simplified freezer_count() */ - current->flags &= ~PF_FREEZER_SKIP; + freezer_count(); } #endif -- cgit v0.10.2 From 9b6fc5dc879bc90f765db0e95eefcf123d0d06dd Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 6 Dec 2011 23:24:38 +0100 Subject: PM / Sleep: Make [un]lock_system_sleep() generic The [un]lock_system_sleep() APIs were originally introduced to mutually exclude memory hotplug and hibernation. Directly using mutex_lock(&pm_mutex) to achieve mutual exclusion with suspend or hibernation code can lead to freezing failures. However, the APIs [un]lock_system_sleep() can be safely used to achieve the same, without causing freezing failures. So, since it would be beneficial to modify all the existing users of mutex_lock(&pm_mutex) (in all parts of the kernel), so that they use these safe APIs intead, make these APIs generic by removing the restriction that they work only when CONFIG_HIBERNATE_CALLBACKS is set. Moreover, that restriction didn't buy us anything anyway. Suggested-by: Tejun Heo Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 906d62c..95040cc 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -332,6 +332,8 @@ static inline bool system_entering_hibernation(void) { return false; } #define PM_RESTORE_PREPARE 0x0005 /* Going to restore a saved image */ #define PM_POST_RESTORE 0x0006 /* Restore failed */ +extern struct mutex pm_mutex; + #ifdef CONFIG_PM_SLEEP void save_processor_state(void); void restore_processor_state(void); @@ -352,6 +354,19 @@ extern bool events_check_enabled; extern bool pm_wakeup_pending(void); extern bool pm_get_wakeup_count(unsigned int *count); extern bool pm_save_wakeup_count(unsigned int count); + +static inline void lock_system_sleep(void) +{ + freezer_do_not_count(); + mutex_lock(&pm_mutex); +} + +static inline void unlock_system_sleep(void) +{ + mutex_unlock(&pm_mutex); + freezer_count(); +} + #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) @@ -367,30 +382,11 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) #define pm_notifier(fn, pri) do { (void)(fn); } while (0) static inline bool pm_wakeup_pending(void) { return false; } -#endif /* !CONFIG_PM_SLEEP */ - -extern struct mutex pm_mutex; -#ifndef CONFIG_HIBERNATE_CALLBACKS static inline void lock_system_sleep(void) {} static inline void unlock_system_sleep(void) {} -#else - -/* Let some subsystems like memory hotadd exclude hibernation */ - -static inline void lock_system_sleep(void) -{ - freezer_do_not_count(); - mutex_lock(&pm_mutex); -} - -static inline void unlock_system_sleep(void) -{ - mutex_unlock(&pm_mutex); - freezer_count(); -} -#endif +#endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_ARCH_SAVE_PAGE_KEYS /* -- cgit v0.10.2 From bcda53faf5814c0c6025a0bd47108adfcbe9f199 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Wed, 7 Dec 2011 22:29:54 +0100 Subject: PM / Sleep: Replace mutex_[un]lock(&pm_mutex) with [un]lock_system_sleep() Using [un]lock_system_sleep() is safer than directly using mutex_[un]lock() on 'pm_mutex', since the latter could lead to freezing failures. Hence convert all the present users of mutex_[un]lock(&pm_mutex) to use these safe APIs instead. Suggested-by: Tejun Heo Signed-off-by: Srivatsa S. Bhat Reviewed-by: Simon Horman Signed-off-by: Rafael J. Wysocki diff --git a/kernel/kexec.c b/kernel/kexec.c index dc7bc08..090ee10 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1523,7 +1523,7 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { - mutex_lock(&pm_mutex); + lock_system_sleep(); pm_prepare_console(); error = freeze_processes(); if (error) { @@ -1576,7 +1576,7 @@ int kernel_kexec(void) thaw_processes(); Restore_console: pm_restore_console(); - mutex_unlock(&pm_mutex); + unlock_system_sleep(); } #endif diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 605149a..6d6d288 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -69,14 +69,14 @@ void hibernation_set_ops(const struct platform_hibernation_ops *ops) WARN_ON(1); return; } - mutex_lock(&pm_mutex); + lock_system_sleep(); hibernation_ops = ops; if (ops) hibernation_mode = HIBERNATION_PLATFORM; else if (hibernation_mode == HIBERNATION_PLATFORM) hibernation_mode = HIBERNATION_SHUTDOWN; - mutex_unlock(&pm_mutex); + unlock_system_sleep(); } static bool entering_platform_hibernation; @@ -597,7 +597,7 @@ int hibernate(void) { int error; - mutex_lock(&pm_mutex); + lock_system_sleep(); /* The snapshot device should not be opened while we're running */ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { error = -EBUSY; @@ -665,7 +665,7 @@ int hibernate(void) pm_restore_console(); atomic_inc(&snapshot_device_available); Unlock: - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return error; } @@ -893,7 +893,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, p = memchr(buf, '\n', n); len = p ? p - buf : n; - mutex_lock(&pm_mutex); + lock_system_sleep(); for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { if (len == strlen(hibernation_modes[i]) && !strncmp(buf, hibernation_modes[i], len)) { @@ -919,7 +919,7 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, if (!error) pr_debug("PM: Hibernation mode set to '%s'\n", hibernation_modes[mode]); - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return error ? error : n; } @@ -946,9 +946,9 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr, if (maj != MAJOR(res) || min != MINOR(res)) goto out; - mutex_lock(&pm_mutex); + lock_system_sleep(); swsusp_resume_device = res; - mutex_unlock(&pm_mutex); + unlock_system_sleep(); printk(KERN_INFO "PM: Starting manual resume from disk\n"); noresume = 0; software_resume(); diff --git a/kernel/power/main.c b/kernel/power/main.c index 7d36fb3..9824b41e 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -116,7 +116,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, p = memchr(buf, '\n', n); len = p ? p - buf : n; - mutex_lock(&pm_mutex); + lock_system_sleep(); level = TEST_FIRST; for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++) @@ -126,7 +126,7 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, break; } - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return error ? error : n; } diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index d336b27..4fd51be 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -42,9 +42,9 @@ static const struct platform_suspend_ops *suspend_ops; */ void suspend_set_ops(const struct platform_suspend_ops *ops) { - mutex_lock(&pm_mutex); + lock_system_sleep(); suspend_ops = ops; - mutex_unlock(&pm_mutex); + unlock_system_sleep(); } EXPORT_SYMBOL_GPL(suspend_set_ops); diff --git a/kernel/power/user.c b/kernel/power/user.c index 06ea33d..98ade21 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -71,7 +71,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) struct snapshot_data *data; int error; - mutex_lock(&pm_mutex); + lock_system_sleep(); if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { error = -EBUSY; @@ -123,7 +123,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->platform_support = 0; Unlock: - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return error; } @@ -132,7 +132,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) { struct snapshot_data *data; - mutex_lock(&pm_mutex); + lock_system_sleep(); swsusp_free(); free_basic_memory_bitmaps(); @@ -146,7 +146,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) PM_POST_HIBERNATION : PM_POST_RESTORE); atomic_inc(&snapshot_device_available); - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return 0; } @@ -158,7 +158,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, ssize_t res; loff_t pg_offp = *offp & ~PAGE_MASK; - mutex_lock(&pm_mutex); + lock_system_sleep(); data = filp->private_data; if (!data->ready) { @@ -179,7 +179,7 @@ static ssize_t snapshot_read(struct file *filp, char __user *buf, *offp += res; Unlock: - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return res; } @@ -191,7 +191,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, ssize_t res; loff_t pg_offp = *offp & ~PAGE_MASK; - mutex_lock(&pm_mutex); + lock_system_sleep(); data = filp->private_data; @@ -208,7 +208,7 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, if (res > 0) *offp += res; unlock: - mutex_unlock(&pm_mutex); + unlock_system_sleep(); return res; } -- cgit v0.10.2 From cba3176e88fa134ece3ae1cf7e35dab9972d7853 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Wed, 7 Dec 2011 22:30:09 +0100 Subject: PM / Sleep: Recommend [un]lock_system_sleep() over using pm_mutex directly Update the documentation to explain the perils of directly using mutex_[un]lock(&pm_mutex) and recommend the usage of the safe APIs [un]lock_system_sleep() instead. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt index 3ab9fbd..6ccb68f 100644 --- a/Documentation/power/freezing-of-tasks.txt +++ b/Documentation/power/freezing-of-tasks.txt @@ -176,3 +176,28 @@ tasks, since it generally exists anyway. A driver must have all firmwares it may need in RAM before suspend() is called. If keeping them is not practical, for example due to their size, they must be requested early enough using the suspend notifier API described in notifiers.txt. + +VI. Are there any precautions to be taken to prevent freezing failures? + +Yes, there are. + +First of all, grabbing the 'pm_mutex' lock to mutually exclude a piece of code +from system-wide sleep such as suspend/hibernation is not encouraged. +If possible, that piece of code must instead hook onto the suspend/hibernation +notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code +(kernel/cpu.c) for an example. + +However, if that is not feasible, and grabbing 'pm_mutex' is deemed necessary, +it is strongly discouraged to directly call mutex_[un]lock(&pm_mutex) since +that could lead to freezing failures, because if the suspend/hibernate code +successfully acquired the 'pm_mutex' lock, and hence that other entity failed +to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE +state. As a consequence, the freezer would not be able to freeze that task, +leading to freezing failure. + +However, the [un]lock_system_sleep() APIs are safe to use in this scenario, +since they ask the freezer to skip freezing this task, since it is anyway +"frozen enough" as it is blocked on 'pm_mutex', which will be released +only after the entire suspend/hibernation sequence is complete. +So, to summarize, use [un]lock_system_sleep() instead of directly using +mutex_[un]lock(&pm_mutex). That would prevent freezing failures. -- cgit v0.10.2 From 925b44a273aa8c4c23c006c1228aacd538eead09 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 8 Dec 2011 23:27:28 +0100 Subject: PM / Domains: Provide an always on power domain governor Since systems are likely to have power domains that can't be turned off for various reasons at least temporarily while implementing power domain support provide a default governor which will always refuse to power off the domain, saving platforms having to implement their own. Since the code is so tiny don't bother with a Kconfig symbol for it. Signed-off-by: Mark Brown Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index da78540..51527ee 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -141,3 +141,16 @@ struct dev_power_governor simple_qos_governor = { .stop_ok = default_stop_ok, .power_down_ok = default_power_down_ok, }; + +static bool always_on_power_down_ok(struct dev_pm_domain *domain) +{ + return false; +} + +/** + * pm_genpd_gov_always_on - A governor implementing an always-on policy + */ +struct dev_power_governor pm_domain_always_on_gov = { + .power_down_ok = always_on_power_down_ok, + .stop_ok = default_stop_ok, +}; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index fb809b9..a03a0ad 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -140,6 +140,7 @@ extern int pm_genpd_poweron(struct generic_pm_domain *genpd); extern bool default_stop_ok(struct device *dev); +extern struct dev_power_governor pm_domain_always_on_gov; #else static inline struct generic_pm_domain *dev_to_genpd(struct device *dev) @@ -193,6 +194,7 @@ static inline bool default_stop_ok(struct device *dev) { return false; } +#define pm_domain_always_on_gov NULL #endif static inline int pm_genpd_remove_callbacks(struct device *dev) -- cgit v0.10.2 From a87dc8fdc250f6416b522a3eb302c8cf95c2317c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 8 Dec 2011 23:27:40 +0100 Subject: PM / shmobile: Use common always on power domain governor Saves a tiny amount of code. Signed-off-by: Mark Brown Acked-by: Magnus Damm Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 8d9ea89..f742c61 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -162,16 +162,6 @@ static bool pd_active_wakeup(struct device *dev) return active_wakeup ? active_wakeup(dev) : true; } -static bool sh7372_power_down_forbidden(struct dev_pm_domain *domain) -{ - return false; -} - -struct dev_power_governor sh7372_always_on_gov = { - .power_down_ok = sh7372_power_down_forbidden, - .stop_ok = default_stop_ok, -}; - static int sh7372_stop_dev(struct device *dev) { int (*stop)(struct device *dev); @@ -250,7 +240,7 @@ struct sh7372_pm_domain sh7372_d4 = { struct sh7372_pm_domain sh7372_a4r = { .genpd.name = "A4R", .bit_shift = 5, - .gov = &sh7372_always_on_gov, + .gov = &pm_domain_always_on_gov, .suspend = sh7372_a4r_suspend, .resume = sh7372_intcs_resume, .stay_on = true, @@ -269,7 +259,7 @@ struct sh7372_pm_domain sh7372_a3ri = { struct sh7372_pm_domain sh7372_a3sp = { .genpd.name = "A3SP", .bit_shift = 11, - .gov = &sh7372_always_on_gov, + .gov = &pm_domain_always_on_gov, .no_debug = true, }; -- cgit v0.10.2 From c656c30668b9408c46d6bfbd1eea472f39a3155d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 8 Dec 2011 23:27:48 +0100 Subject: ARM: S3C64XX: Implement basic power domain support The S3C64xx SoCs contain a set of gateable power domains which can be enabled and disabled at runtime in order to save power. Use the generic power domain code to implement support for these in software, enabling runtime control of most domains: - ETM (not supported in mainline). - Domain G: 3D acceleration (no mainline support). - Domain V: MFC (no mainline support). - Domain I: JPEG and camera interface (no mainline support). - Domain P: 2D acceleration, TV encoder and scaler (no mainline support) - Domain S: Security (no mainline support). - Domain F: LCD (driver already uses runtime PM), post processing and rotation (no mainline support). The IROM domain is marked as always enabled as we should arrange for it to be enabled when we suspend which will need a bit more work. Due to all the conditional device registration that the platform does wrap s3c_pm_init() with s3c64xx_pm_init() which actually puts the device into the power domain after the machines have registered, looking for platform data to tell if the device was registered. Since currently only Cragganmore actually sets up PM that is the only machine updated. Signed-off-by: Mark Brown Acked-by: Kukjin Kim Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-s3c64xx/Kconfig b/arch/arm/mach-s3c64xx/Kconfig index 5552e04..381586c 100644 --- a/arch/arm/mach-s3c64xx/Kconfig +++ b/arch/arm/mach-s3c64xx/Kconfig @@ -8,6 +8,7 @@ config PLAT_S3C64XX bool depends on ARCH_S3C64XX select SAMSUNG_WAKEMASK + select PM_GENERIC_DOMAINS default y help Base platform code for any Samsung S3C64XX device diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c index d04b654..707b9b2 100644 --- a/arch/arm/mach-s3c64xx/mach-crag6410.c +++ b/arch/arm/mach-s3c64xx/mach-crag6410.c @@ -704,7 +704,7 @@ static void __init crag6410_machine_init(void) regulator_has_full_constraints(); - s3c_pm_init(); + s3c64xx_pm_init(); } MACHINE_START(WLF_CRAGG_6410, "Wolfson Cragganmore 6410") diff --git a/arch/arm/mach-s3c64xx/pm.c b/arch/arm/mach-s3c64xx/pm.c index b375cd5..7d3e81b 100644 --- a/arch/arm/mach-s3c64xx/pm.c +++ b/arch/arm/mach-s3c64xx/pm.c @@ -17,10 +17,12 @@ #include #include #include +#include #include #include +#include #include #include @@ -31,6 +33,148 @@ #include #include +struct s3c64xx_pm_domain { + char *const name; + u32 ena; + u32 pwr_stat; + struct generic_pm_domain pd; +}; + +static int s3c64xx_pd_off(struct generic_pm_domain *domain) +{ + struct s3c64xx_pm_domain *pd; + u32 val; + + pd = container_of(domain, struct s3c64xx_pm_domain, pd); + + val = __raw_readl(S3C64XX_NORMAL_CFG); + val &= ~(pd->ena); + __raw_writel(val, S3C64XX_NORMAL_CFG); + + return 0; +} + +static int s3c64xx_pd_on(struct generic_pm_domain *domain) +{ + struct s3c64xx_pm_domain *pd; + u32 val; + long retry = 1000000L; + + pd = container_of(domain, struct s3c64xx_pm_domain, pd); + + val = __raw_readl(S3C64XX_NORMAL_CFG); + val |= pd->ena; + __raw_writel(val, S3C64XX_NORMAL_CFG); + + /* Not all domains provide power status readback */ + if (pd->pwr_stat) { + do { + cpu_relax(); + if (__raw_readl(S3C64XX_BLK_PWR_STAT) & pd->pwr_stat) + break; + } while (retry--); + + if (!retry) { + pr_err("Failed to start domain %s\n", pd->name); + return -EBUSY; + } + } + + return 0; +} + +static struct s3c64xx_pm_domain s3c64xx_pm_irom = { + .name = "IROM", + .ena = S3C64XX_NORMALCFG_IROM_ON, + .pd = { + .power_off = s3c64xx_pd_off, + .power_on = s3c64xx_pd_on, + }, +}; + +static struct s3c64xx_pm_domain s3c64xx_pm_etm = { + .name = "ETM", + .ena = S3C64XX_NORMALCFG_DOMAIN_ETM_ON, + .pwr_stat = S3C64XX_BLKPWRSTAT_ETM, + .pd = { + .power_off = s3c64xx_pd_off, + .power_on = s3c64xx_pd_on, + }, +}; + +static struct s3c64xx_pm_domain s3c64xx_pm_s = { + .name = "S", + .ena = S3C64XX_NORMALCFG_DOMAIN_S_ON, + .pwr_stat = S3C64XX_BLKPWRSTAT_S, + .pd = { + .power_off = s3c64xx_pd_off, + .power_on = s3c64xx_pd_on, + }, +}; + +static struct s3c64xx_pm_domain s3c64xx_pm_f = { + .name = "F", + .ena = S3C64XX_NORMALCFG_DOMAIN_F_ON, + .pwr_stat = S3C64XX_BLKPWRSTAT_F, + .pd = { + .power_off = s3c64xx_pd_off, + .power_on = s3c64xx_pd_on, + }, +}; + +static struct s3c64xx_pm_domain s3c64xx_pm_p = { + .name = "P", + .ena = S3C64XX_NORMALCFG_DOMAIN_P_ON, + .pwr_stat = S3C64XX_BLKPWRSTAT_P, + .pd = { + .power_off = s3c64xx_pd_off, + .power_on = s3c64xx_pd_on, + }, +}; + +static struct s3c64xx_pm_domain s3c64xx_pm_i = { + .name = "I", + .ena = S3C64XX_NORMALCFG_DOMAIN_I_ON, + .pwr_stat = S3C64XX_BLKPWRSTAT_I, + .pd = { + .power_off = s3c64xx_pd_off, + .power_on = s3c64xx_pd_on, + }, +}; + +static struct s3c64xx_pm_domain s3c64xx_pm_g = { + .name = "G", + .ena = S3C64XX_NORMALCFG_DOMAIN_G_ON, + .pd = { + .power_off = s3c64xx_pd_off, + .power_on = s3c64xx_pd_on, + }, +}; + +static struct s3c64xx_pm_domain s3c64xx_pm_v = { + .name = "V", + .ena = S3C64XX_NORMALCFG_DOMAIN_V_ON, + .pwr_stat = S3C64XX_BLKPWRSTAT_V, + .pd = { + .power_off = s3c64xx_pd_off, + .power_on = s3c64xx_pd_on, + }, +}; + +static struct s3c64xx_pm_domain *s3c64xx_always_on_pm_domains[] = { + &s3c64xx_pm_irom, +}; + +static struct s3c64xx_pm_domain *s3c64xx_pm_domains[] = { + &s3c64xx_pm_etm, + &s3c64xx_pm_g, + &s3c64xx_pm_v, + &s3c64xx_pm_i, + &s3c64xx_pm_p, + &s3c64xx_pm_s, + &s3c64xx_pm_f, +}; + #ifdef CONFIG_S3C_PM_DEBUG_LED_SMDK void s3c_pm_debug_smdkled(u32 set, u32 clear) { @@ -89,6 +233,8 @@ static struct sleep_save misc_save[] = { SAVE_ITEM(S3C64XX_SDMA_SEL), SAVE_ITEM(S3C64XX_MODEM_MIFPCON), + + SAVE_ITEM(S3C64XX_NORMAL_CFG), }; void s3c_pm_configure_extint(void) @@ -179,7 +325,26 @@ static void s3c64xx_pm_prepare(void) __raw_writel(__raw_readl(S3C64XX_WAKEUP_STAT), S3C64XX_WAKEUP_STAT); } -static int s3c64xx_pm_init(void) +int __init s3c64xx_pm_init(void) +{ + int i; + + s3c_pm_init(); + + for (i = 0; i < ARRAY_SIZE(s3c64xx_always_on_pm_domains); i++) + pm_genpd_init(&s3c64xx_always_on_pm_domains[i]->pd, + &pm_domain_always_on_gov, false); + + for (i = 0; i < ARRAY_SIZE(s3c64xx_pm_domains); i++) + pm_genpd_init(&s3c64xx_pm_domains[i]->pd, NULL, false); + + if (dev_get_platdata(&s3c_device_fb.dev)) + pm_genpd_add_device(&s3c64xx_pm_f.pd, &s3c_device_fb.dev); + + return 0; +} + +static __init int s3c64xx_pm_initcall(void) { pm_cpu_prep = s3c64xx_pm_prepare; pm_cpu_sleep = s3c64xx_cpu_suspend; @@ -198,5 +363,12 @@ static int s3c64xx_pm_init(void) return 0; } +arch_initcall(s3c64xx_pm_initcall); + +static __init int s3c64xx_pm_late_initcall(void) +{ + pm_genpd_poweroff_unused(); -arch_initcall(s3c64xx_pm_init); + return 0; +} +late_initcall(s3c64xx_pm_late_initcall); diff --git a/arch/arm/plat-samsung/include/plat/pm.h b/arch/arm/plat-samsung/include/plat/pm.h index dcf6870..a6bdee2 100644 --- a/arch/arm/plat-samsung/include/plat/pm.h +++ b/arch/arm/plat-samsung/include/plat/pm.h @@ -22,6 +22,7 @@ struct sys_device; #ifdef CONFIG_PM extern __init int s3c_pm_init(void); +extern __init int s3c64xx_pm_init(void); #else @@ -29,6 +30,11 @@ static inline int s3c_pm_init(void) { return 0; } + +static inline int s3c64xx_pm_init(void) +{ + return 0; +} #endif /* configuration for the IRQ mask over sleep */ -- cgit v0.10.2 From b298d289c79211508f11cb50749b0d1d54eb244a Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Fri, 9 Dec 2011 23:36:36 +0100 Subject: PM / Sleep: Fix freezer failures due to racy usermodehelper_is_disabled() Commit a144c6a (PM: Print a warning if firmware is requested when tasks are frozen) introduced usermodehelper_is_disabled() to warn and exit immediately if firmware is requested when usermodehelpers are disabled. However, it is racy. Consider the following scenario, currently used in drivers/base/firmware_class.c: ... if (usermodehelper_is_disabled()) goto out; /* Do actual work */ ... out: return err; Nothing prevents someone from disabling usermodehelpers just after the check in the 'if' condition, which means that it is quite possible to try doing the "actual work" with usermodehelpers disabled, leading to undesirable consequences. In particular, this race condition in _request_firmware() causes task freezing failures whenever suspend/hibernation is in progress because, it wrongly waits to get the firmware/microcode image from userspace when actually the usermodehelpers are disabled or userspace has been frozen. Some of the example scenarios that cause freezing failures due to this race are those that depend on userspace via request_firmware(), such as x86 microcode module initialization and microcode image reload. Previous discussions about this issue can be found at: http://thread.gmane.org/gmane.linux.kernel/1198291/focus=1200591 This patch adds proper synchronization to fix this issue. It is to be noted that this patchset fixes the freezing failures but doesn't remove the warnings. IOW, it does not attempt to add explicit synchronization to x86 microcode driver to avoid requesting microcode image at inopportune moments. Because, the warnings were introduced to highlight such cases, in the first place. And we need not silence the warnings, since we take care of the *real* problem (freezing failure) and hence, after that, the warnings are pretty harmless anyway. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 06ed6b4..d5585da 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -534,6 +534,8 @@ static int _request_firmware(const struct firmware **firmware_p, return 0; } + read_lock_usermodehelper(); + if (WARN_ON(usermodehelper_is_disabled())) { dev_err(device, "firmware: %s will not be loaded\n", name); retval = -EBUSY; @@ -572,6 +574,8 @@ static int _request_firmware(const struct firmware **firmware_p, fw_destroy_instance(fw_priv); out: + read_unlock_usermodehelper(); + if (retval) { release_firmware(firmware); *firmware_p = NULL; diff --git a/include/linux/kmod.h b/include/linux/kmod.h index b16f653..722f477 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -117,5 +117,7 @@ extern void usermodehelper_init(void); extern int usermodehelper_disable(void); extern void usermodehelper_enable(void); extern bool usermodehelper_is_disabled(void); +extern void read_lock_usermodehelper(void); +extern void read_unlock_usermodehelper(void); #endif /* __LINUX_KMOD_H__ */ diff --git a/kernel/kmod.c b/kernel/kmod.c index a4bea97..81b4a27 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -50,6 +51,7 @@ static struct workqueue_struct *khelper_wq; static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; static DEFINE_SPINLOCK(umh_sysctl_lock); +static DECLARE_RWSEM(umhelper_sem); #ifdef CONFIG_MODULES @@ -275,6 +277,7 @@ static void __call_usermodehelper(struct work_struct *work) * If set, call_usermodehelper_exec() will exit immediately returning -EBUSY * (used for preventing user land processes from being created after the user * land has been frozen during a system-wide hibernation or suspend operation). + * Should always be manipulated under umhelper_sem acquired for write. */ static int usermodehelper_disabled = 1; @@ -293,6 +296,18 @@ static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq); */ #define RUNNING_HELPERS_TIMEOUT (5 * HZ) +void read_lock_usermodehelper(void) +{ + down_read(&umhelper_sem); +} +EXPORT_SYMBOL_GPL(read_lock_usermodehelper); + +void read_unlock_usermodehelper(void) +{ + up_read(&umhelper_sem); +} +EXPORT_SYMBOL_GPL(read_unlock_usermodehelper); + /** * usermodehelper_disable - prevent new helpers from being started */ @@ -300,8 +315,10 @@ int usermodehelper_disable(void) { long retval; + down_write(&umhelper_sem); usermodehelper_disabled = 1; - smp_mb(); + up_write(&umhelper_sem); + /* * From now on call_usermodehelper_exec() won't start any new * helpers, so it is sufficient if running_helpers turns out to @@ -314,7 +331,9 @@ int usermodehelper_disable(void) if (retval) return 0; + down_write(&umhelper_sem); usermodehelper_disabled = 0; + up_write(&umhelper_sem); return -EAGAIN; } @@ -323,7 +342,9 @@ int usermodehelper_disable(void) */ void usermodehelper_enable(void) { + down_write(&umhelper_sem); usermodehelper_disabled = 0; + up_write(&umhelper_sem); } /** -- cgit v0.10.2 From cf007e3526a785a95a738d5a8fba44f1f4fe33e0 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 8 Dec 2011 23:42:53 +0100 Subject: PM / Hibernate: Remove deprecated hibernation snapshot ioctls Several snapshot ioctls were marked for removal quite some time ago, since they were deprecated. Remove them. Suggested-by: Rafael J. Wysocki Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 3d84912..9f51fc4 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -85,17 +85,6 @@ Who: Robin Getz & Matt Mackall --------------------------- -What: Deprecated snapshot ioctls -When: 2.6.36 - -Why: The ioctls in kernel/power/user.c were marked as deprecated long time - ago. Now they notify users about that so that they need to replace - their userspace. After some more time, remove them completely. - -Who: Jiri Slaby - ---------------------------- - What: The ieee80211_regdom module parameter When: March 2010 / desktop catchup diff --git a/kernel/power/user.c b/kernel/power/user.c index 98ade21..78bdb44 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -30,28 +30,6 @@ #include "power.h" -/* - * NOTE: The SNAPSHOT_SET_SWAP_FILE and SNAPSHOT_PMOPS ioctls are obsolete and - * will be removed in the future. They are only preserved here for - * compatibility with existing userland utilities. - */ -#define SNAPSHOT_SET_SWAP_FILE _IOW(SNAPSHOT_IOC_MAGIC, 10, unsigned int) -#define SNAPSHOT_PMOPS _IOW(SNAPSHOT_IOC_MAGIC, 12, unsigned int) - -#define PMOPS_PREPARE 1 -#define PMOPS_ENTER 2 -#define PMOPS_FINISH 3 - -/* - * NOTE: The following ioctl definitions are wrong and have been replaced with - * correct ones. They are only preserved here for compatibility with existing - * userland utilities and will be removed in the future. - */ -#define SNAPSHOT_ATOMIC_SNAPSHOT _IOW(SNAPSHOT_IOC_MAGIC, 3, void *) -#define SNAPSHOT_SET_IMAGE_SIZE _IOW(SNAPSHOT_IOC_MAGIC, 6, unsigned long) -#define SNAPSHOT_AVAIL_SWAP _IOR(SNAPSHOT_IOC_MAGIC, 7, void *) -#define SNAPSHOT_GET_SWAP_PAGE _IOR(SNAPSHOT_IOC_MAGIC, 8, void *) - #define SNAPSHOT_MINOR 231 @@ -213,15 +191,6 @@ unlock: return res; } -static void snapshot_deprecated_ioctl(unsigned int cmd) -{ - if (printk_ratelimit()) - printk(KERN_NOTICE "%pf: ioctl '%.8x' is deprecated and will " - "be removed soon, update your suspend-to-disk " - "utilities\n", - __builtin_return_address(0), cmd); -} - static long snapshot_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -272,8 +241,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, data->frozen = 0; break; - case SNAPSHOT_ATOMIC_SNAPSHOT: - snapshot_deprecated_ioctl(cmd); case SNAPSHOT_CREATE_IMAGE: if (data->mode != O_RDONLY || !data->frozen || data->ready) { error = -EPERM; @@ -308,8 +275,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, data->ready = 0; break; - case SNAPSHOT_SET_IMAGE_SIZE: - snapshot_deprecated_ioctl(cmd); case SNAPSHOT_PREF_IMAGE_SIZE: image_size = arg; break; @@ -324,16 +289,12 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = put_user(size, (loff_t __user *)arg); break; - case SNAPSHOT_AVAIL_SWAP: - snapshot_deprecated_ioctl(cmd); case SNAPSHOT_AVAIL_SWAP_SIZE: size = count_swap_pages(data->swap, 1); size <<= PAGE_SHIFT; error = put_user(size, (loff_t __user *)arg); break; - case SNAPSHOT_GET_SWAP_PAGE: - snapshot_deprecated_ioctl(cmd); case SNAPSHOT_ALLOC_SWAP_PAGE: if (data->swap < 0 || data->swap >= MAX_SWAPFILES) { error = -ENODEV; @@ -356,27 +317,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, free_all_swap_pages(data->swap); break; - case SNAPSHOT_SET_SWAP_FILE: /* This ioctl is deprecated */ - snapshot_deprecated_ioctl(cmd); - if (!swsusp_swap_in_use()) { - /* - * User space encodes device types as two-byte values, - * so we need to recode them - */ - if (old_decode_dev(arg)) { - data->swap = swap_type_of(old_decode_dev(arg), - 0, NULL); - if (data->swap < 0) - error = -ENODEV; - } else { - data->swap = -1; - error = -EINVAL; - } - } else { - error = -EPERM; - } - break; - case SNAPSHOT_S2RAM: if (!data->frozen) { error = -EPERM; @@ -399,33 +339,6 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, error = hibernation_platform_enter(); break; - case SNAPSHOT_PMOPS: /* This ioctl is deprecated */ - snapshot_deprecated_ioctl(cmd); - error = -EINVAL; - - switch (arg) { - - case PMOPS_PREPARE: - data->platform_support = 1; - error = 0; - break; - - case PMOPS_ENTER: - if (data->platform_support) - error = hibernation_platform_enter(); - break; - - case PMOPS_FINISH: - if (data->platform_support) - error = 0; - break; - - default: - printk(KERN_ERR "SNAPSHOT_PMOPS: invalid argument %ld\n", arg); - - } - break; - case SNAPSHOT_SET_SWAP_AREA: if (swsusp_swap_in_use()) { error = -EPERM; -- cgit v0.10.2 From 8ca6d9bcc8d33c592c0855b4b1481bc723ac7e85 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 15 Dec 2011 20:59:23 +0100 Subject: PM / Sleep: Simplify generic system suspend callbacks The pm_runtime_suspended() check in __pm_generic_call() doesn't really help and may cause problems to happen, because in some cases the system suspend callbacks need to be called even if the given device has been suspended by runtime PM. For example, if the device generally supports remote wakeup and is not enabled to wake up the system from sleep, it should be prevented from generating wakeup signals during system suspend and that has to be done by the suspend callbacks that the pm_runtime_suspended() check prevents from being executed. Similarly, it may not be a good idea to unconditionally change the runtime PM status of the device to 'active' in __pm_generic_resume(), because the driver may want to leave the device in the 'suspended' state, depending on what happened to it before the system suspend and whether or not it is enabled to wake up the system. For the above reasons, remove the pm_runtime_suspended() check from __pm_generic_call() and remove the code changing the device's runtime PM status from __pm_generic_resume(). Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c index 265a0ee..1b878b955 100644 --- a/drivers/base/power/generic_ops.c +++ b/drivers/base/power/generic_ops.c @@ -97,16 +97,16 @@ int pm_generic_prepare(struct device *dev) * @event: PM transition of the system under way. * @bool: Whether or not this is the "noirq" stage. * - * If the device has not been suspended at run time, execute the - * suspend/freeze/poweroff/thaw callback provided by its driver, if defined, and - * return its error code. Otherwise, return zero. + * Execute the suspend/freeze/poweroff/thaw callback provided by the driver of + * @dev, if defined, and return its error code. Return 0 if the callback is + * not present. */ static int __pm_generic_call(struct device *dev, int event, bool noirq) { const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int (*callback)(struct device *); - if (!pm || pm_runtime_suspended(dev)) + if (!pm) return 0; switch (event) { @@ -217,14 +217,12 @@ EXPORT_SYMBOL_GPL(pm_generic_thaw); * @bool: Whether or not this is the "noirq" stage. * * Execute the resume/resotre callback provided by the @dev's driver, if - * defined. If it returns 0, change the device's runtime PM status to 'active'. - * Return the callback's error code. + * defined, and return its error code. Return 0 if the callback is not present. */ static int __pm_generic_resume(struct device *dev, int event, bool noirq) { const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int (*callback)(struct device *); - int ret; if (!pm) return 0; @@ -241,17 +239,7 @@ static int __pm_generic_resume(struct device *dev, int event, bool noirq) break; } - if (!callback) - return 0; - - ret = callback(dev); - if (!ret && !noirq && pm_runtime_enabled(dev)) { - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - } - - return ret; + return callback ? callback(dev) : 0; } /** -- cgit v0.10.2 From 1eac8111e0763853266a171ce11214da3a347a0a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 15 Dec 2011 20:59:30 +0100 Subject: PM / Sleep: Merge internal functions in generic_ops.c After the change that removed the code related to runtime PM from __pm_generic_call() and __pm_generic_resume() these two functions need not be separate any more, so merge them. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c index 1b878b955..5a5b154 100644 --- a/drivers/base/power/generic_ops.c +++ b/drivers/base/power/generic_ops.c @@ -97,7 +97,7 @@ int pm_generic_prepare(struct device *dev) * @event: PM transition of the system under way. * @bool: Whether or not this is the "noirq" stage. * - * Execute the suspend/freeze/poweroff/thaw callback provided by the driver of + * Execute the PM callback corresponding to @event provided by the driver of * @dev, if defined, and return its error code. Return 0 if the callback is * not present. */ @@ -119,9 +119,15 @@ static int __pm_generic_call(struct device *dev, int event, bool noirq) case PM_EVENT_HIBERNATE: callback = noirq ? pm->poweroff_noirq : pm->poweroff; break; + case PM_EVENT_RESUME: + callback = noirq ? pm->resume_noirq : pm->resume; + break; case PM_EVENT_THAW: callback = noirq ? pm->thaw_noirq : pm->thaw; break; + case PM_EVENT_RESTORE: + callback = noirq ? pm->restore_noirq : pm->restore; + break; default: callback = NULL; break; @@ -211,44 +217,12 @@ int pm_generic_thaw(struct device *dev) EXPORT_SYMBOL_GPL(pm_generic_thaw); /** - * __pm_generic_resume - Generic resume/restore callback for subsystems. - * @dev: Device to handle. - * @event: PM transition of the system under way. - * @bool: Whether or not this is the "noirq" stage. - * - * Execute the resume/resotre callback provided by the @dev's driver, if - * defined, and return its error code. Return 0 if the callback is not present. - */ -static int __pm_generic_resume(struct device *dev, int event, bool noirq) -{ - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - int (*callback)(struct device *); - - if (!pm) - return 0; - - switch (event) { - case PM_EVENT_RESUME: - callback = noirq ? pm->resume_noirq : pm->resume; - break; - case PM_EVENT_RESTORE: - callback = noirq ? pm->restore_noirq : pm->restore; - break; - default: - callback = NULL; - break; - } - - return callback ? callback(dev) : 0; -} - -/** * pm_generic_resume_noirq - Generic resume_noirq callback for subsystems. * @dev: Device to resume. */ int pm_generic_resume_noirq(struct device *dev) { - return __pm_generic_resume(dev, PM_EVENT_RESUME, true); + return __pm_generic_call(dev, PM_EVENT_RESUME, true); } EXPORT_SYMBOL_GPL(pm_generic_resume_noirq); @@ -258,7 +232,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume_noirq); */ int pm_generic_resume(struct device *dev) { - return __pm_generic_resume(dev, PM_EVENT_RESUME, false); + return __pm_generic_call(dev, PM_EVENT_RESUME, false); } EXPORT_SYMBOL_GPL(pm_generic_resume); @@ -268,7 +242,7 @@ EXPORT_SYMBOL_GPL(pm_generic_resume); */ int pm_generic_restore_noirq(struct device *dev) { - return __pm_generic_resume(dev, PM_EVENT_RESTORE, true); + return __pm_generic_call(dev, PM_EVENT_RESTORE, true); } EXPORT_SYMBOL_GPL(pm_generic_restore_noirq); @@ -278,7 +252,7 @@ EXPORT_SYMBOL_GPL(pm_generic_restore_noirq); */ int pm_generic_restore(struct device *dev) { - return __pm_generic_resume(dev, PM_EVENT_RESTORE, false); + return __pm_generic_call(dev, PM_EVENT_RESTORE, false); } EXPORT_SYMBOL_GPL(pm_generic_restore); -- cgit v0.10.2 From 7b4050381127ae11fcfc74a106d715a5fbbf888a Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Thu, 14 Jul 2011 10:33:55 +0900 Subject: PM/Devfreq: Add Exynos4-bus device DVFS driver for Exynos4210/4212/4412. Exynos4-bus device devfreq driver add DVFS capability for Exynos4210/4212/4412-Bus (memory). The driver monitors PPMU counters of memory controllers and adjusts operating frequencies and voltages with OPP. For Exynos4210, vdd_int is controlled. For exynos4412/4212, vdd_mif and vdd_int are controlled. Dependency (CONFIG_EXYNOS_ASV): Exynos4 ASV driver has been posted in the mailing list; however, it si not yet upstreamed. Although the current revision of Exynos4 ASV patch does not contain "CONFIG_EXYNOS_ASV", we have added the symbol to hide the dependent from compilers for now. As soon as Exynos4 ASV drivers are merged, the #ifdef statement will be removed or the name will be changed. However, enabling ASV is essential in most Exynos4 chips to reduce the power consumption of Exynos4210 because without ASV, this Devfreq driver assumes the worst case scenario, which consumes more power. Signed-off-by: MyungJoo Ham Signed-off-by: Kyungmin Park --- Changes from v1 - Support 4212 and 4412 as well as 4210. diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 8f04910..464fa21 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -65,4 +65,17 @@ config DEVFREQ_GOV_USERSPACE comment "DEVFREQ Drivers" +config ARM_EXYNOS4_BUS_DEVFREQ + bool "ARM Exynos4210/4212/4412 Memory Bus DEVFREQ Driver" + depends on CPU_EXYNOS4210 || CPU_EXYNOS4212 || CPU_EXYNOS4412 + select ARCH_HAS_OPP + select DEVFREQ_GOV_SIMPLE_ONDEMAND + help + This adds the DEVFREQ driver for Exynos4210 memory bus (vdd_int) + and Exynos4212/4412 memory interface and bus (vdd_mif + vdd_int). + It reads PPMU counters of memory controllers and adjusts + the operating frequencies and voltages with OPP support. + To operate with optimal voltages, ASV support is required + (CONFIG_EXYNOS_ASV). + endif # PM_DEVFREQ diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile index 4564a89..8c46423 100644 --- a/drivers/devfreq/Makefile +++ b/drivers/devfreq/Makefile @@ -3,3 +3,6 @@ obj-$(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) += governor_simpleondemand.o obj-$(CONFIG_DEVFREQ_GOV_PERFORMANCE) += governor_performance.o obj-$(CONFIG_DEVFREQ_GOV_POWERSAVE) += governor_powersave.o obj-$(CONFIG_DEVFREQ_GOV_USERSPACE) += governor_userspace.o + +# DEVFREQ Drivers +obj-$(CONFIG_ARM_EXYNOS4_BUS_DEVFREQ) += exynos4_bus.o diff --git a/drivers/devfreq/exynos4_bus.c b/drivers/devfreq/exynos4_bus.c new file mode 100644 index 0000000..6460577 --- /dev/null +++ b/drivers/devfreq/exynos4_bus.c @@ -0,0 +1,1135 @@ +/* drivers/devfreq/exynos4210_memorybus.c + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * MyungJoo Ham + * + * EXYNOS4 - Memory/Bus clock frequency scaling support in DEVFREQ framework + * This version supports EXYNOS4210 only. This changes bus frequencies + * and vddint voltages. Exynos4412/4212 should be able to be supported + * with minor modifications. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Exynos4 ASV has been in the mailing list, but not upstreamed, yet. */ +#ifdef CONFIG_EXYNOS_ASV +extern unsigned int exynos_result_of_asv; +#endif + +#include + +#include + +#define MAX_SAFEVOLT 1200000 /* 1.2V */ + +enum exynos4_busf_type { + TYPE_BUSF_EXYNOS4210, + TYPE_BUSF_EXYNOS4x12, +}; + +/* Assume that the bus is saturated if the utilization is 40% */ +#define BUS_SATURATION_RATIO 40 + +enum ppmu_counter { + PPMU_PMNCNT0 = 0, + PPMU_PMCCNT1, + PPMU_PMNCNT2, + PPMU_PMNCNT3, + PPMU_PMNCNT_MAX, +}; +struct exynos4_ppmu { + void __iomem *hw_base; + unsigned int ccnt; + unsigned int event; + unsigned int count[PPMU_PMNCNT_MAX]; + bool ccnt_overflow; + bool count_overflow[PPMU_PMNCNT_MAX]; +}; + +enum busclk_level_idx { + LV_0 = 0, + LV_1, + LV_2, + LV_3, + LV_4, + _LV_END +}; +#define EX4210_LV_MAX LV_2 +#define EX4x12_LV_MAX LV_4 +#define EX4210_LV_NUM (LV_2 + 1) +#define EX4x12_LV_NUM (LV_4 + 1) + +struct busfreq_data { + enum exynos4_busf_type type; + struct device *dev; + struct devfreq *devfreq; + bool disabled; + struct regulator *vdd_int; + struct regulator *vdd_mif; /* Exynos4412/4212 only */ + struct opp *curr_opp; + struct exynos4_ppmu dmc[2]; + + struct notifier_block pm_notifier; + struct mutex lock; + + /* Dividers calculated at boot/probe-time */ + unsigned int dmc_divtable[_LV_END]; /* DMC0 */ + unsigned int top_divtable[_LV_END]; +}; + +struct bus_opp_table { + unsigned int idx; + unsigned long clk; + unsigned long volt; +}; + +/* 4210 controls clock of mif and voltage of int */ +static struct bus_opp_table exynos4210_busclk_table[] = { + {LV_0, 400000, 1150000}, + {LV_1, 267000, 1050000}, + {LV_2, 133000, 1025000}, + {0, 0, 0}, +}; + +/* + * MIF is the main control knob clock for exynox4x12 MIF/INT + * clock and voltage of both mif/int are controlled. + */ +static struct bus_opp_table exynos4x12_mifclk_table[] = { + {LV_0, 400000, 1100000}, + {LV_1, 267000, 1000000}, + {LV_2, 160000, 950000}, + {LV_3, 133000, 950000}, + {LV_4, 100000, 950000}, + {0, 0, 0}, +}; + +/* + * INT is not the control knob of 4x12. LV_x is not meant to represent + * the current performance. (MIF does) + */ +static struct bus_opp_table exynos4x12_intclk_table[] = { + {LV_0, 200000, 1000000}, + {LV_1, 160000, 950000}, + {LV_2, 133000, 925000}, + {LV_3, 100000, 900000}, + {0, 0, 0}, +}; + +/* TODO: asv volt definitions are "__initdata"? */ +/* Some chips have different operating voltages */ +static unsigned int exynos4210_asv_volt[][EX4210_LV_NUM] = { + {1150000, 1050000, 1050000}, + {1125000, 1025000, 1025000}, + {1100000, 1000000, 1000000}, + {1075000, 975000, 975000}, + {1050000, 950000, 950000}, +}; + +static unsigned int exynos4x12_mif_step_50[][EX4x12_LV_NUM] = { + /* 400 267 160 133 100 */ + {1050000, 950000, 900000, 900000, 900000}, /* ASV0 */ + {1050000, 950000, 900000, 900000, 900000}, /* ASV1 */ + {1050000, 950000, 900000, 900000, 900000}, /* ASV2 */ + {1050000, 900000, 900000, 900000, 900000}, /* ASV3 */ + {1050000, 900000, 900000, 900000, 850000}, /* ASV4 */ + {1050000, 900000, 900000, 850000, 850000}, /* ASV5 */ + {1050000, 900000, 850000, 850000, 850000}, /* ASV6 */ + {1050000, 900000, 850000, 850000, 850000}, /* ASV7 */ + {1050000, 900000, 850000, 850000, 850000}, /* ASV8 */ +}; + +static unsigned int exynos4x12_int_volt[][EX4x12_LV_NUM] = { + /* 200 160 133 100 */ + {1000000, 950000, 925000, 900000}, /* ASV0 */ + {975000, 925000, 925000, 900000}, /* ASV1 */ + {950000, 925000, 900000, 875000}, /* ASV2 */ + {950000, 900000, 900000, 875000}, /* ASV3 */ + {925000, 875000, 875000, 875000}, /* ASV4 */ + {900000, 850000, 850000, 850000}, /* ASV5 */ + {900000, 850000, 850000, 850000}, /* ASV6 */ + {900000, 850000, 850000, 850000}, /* ASV7 */ + {900000, 850000, 850000, 850000}, /* ASV8 */ +}; + +/*** Clock Divider Data for Exynos4210 ***/ +static unsigned int exynos4210_clkdiv_dmc0[][8] = { + /* + * Clock divider value for following + * { DIVACP, DIVACP_PCLK, DIVDPHY, DIVDMC, DIVDMCD + * DIVDMCP, DIVCOPY2, DIVCORE_TIMERS } + */ + + /* DMC L0: 400MHz */ + { 3, 1, 1, 1, 1, 1, 3, 1 }, + /* DMC L1: 266.7MHz */ + { 4, 1, 1, 2, 1, 1, 3, 1 }, + /* DMC L2: 133MHz */ + { 5, 1, 1, 5, 1, 1, 3, 1 }, +}; +static unsigned int exynos4210_clkdiv_top[][5] = { + /* + * Clock divider value for following + * { DIVACLK200, DIVACLK100, DIVACLK160, DIVACLK133, DIVONENAND } + */ + /* ACLK200 L0: 200MHz */ + { 3, 7, 4, 5, 1 }, + /* ACLK200 L1: 160MHz */ + { 4, 7, 5, 6, 1 }, + /* ACLK200 L2: 133MHz */ + { 5, 7, 7, 7, 1 }, +}; +static unsigned int exynos4210_clkdiv_lr_bus[][2] = { + /* + * Clock divider value for following + * { DIVGDL/R, DIVGPL/R } + */ + /* ACLK_GDL/R L1: 200MHz */ + { 3, 1 }, + /* ACLK_GDL/R L2: 160MHz */ + { 4, 1 }, + /* ACLK_GDL/R L3: 133MHz */ + { 5, 1 }, +}; + +/*** Clock Divider Data for Exynos4212/4412 ***/ +static unsigned int exynos4x12_clkdiv_dmc0[][6] = { + /* + * Clock divider value for following + * { DIVACP, DIVACP_PCLK, DIVDPHY, DIVDMC, DIVDMCD + * DIVDMCP} + */ + + /* DMC L0: 400MHz */ + {3, 1, 1, 1, 1, 1}, + /* DMC L1: 266.7MHz */ + {4, 1, 1, 2, 1, 1}, + /* DMC L2: 160MHz */ + {5, 1, 1, 4, 1, 1}, + /* DMC L3: 133MHz */ + {5, 1, 1, 5, 1, 1}, + /* DMC L4: 100MHz */ + {7, 1, 1, 7, 1, 1}, +}; +static unsigned int exynos4x12_clkdiv_dmc1[][6] = { + /* + * Clock divider value for following + * { G2DACP, DIVC2C, DIVC2C_ACLK } + */ + + /* DMC L0: 400MHz */ + {3, 1, 1}, + /* DMC L1: 266.7MHz */ + {4, 2, 1}, + /* DMC L2: 160MHz */ + {5, 4, 1}, + /* DMC L3: 133MHz */ + {5, 5, 1}, + /* DMC L4: 100MHz */ + {7, 7, 1}, +}; +static unsigned int exynos4x12_clkdiv_top[][5] = { + /* + * Clock divider value for following + * { DIVACLK266_GPS, DIVACLK100, DIVACLK160, + DIVACLK133, DIVONENAND } + */ + + /* ACLK_GDL/R L0: 200MHz */ + {2, 7, 4, 5, 1}, + /* ACLK_GDL/R L1: 200MHz */ + {2, 7, 4, 5, 1}, + /* ACLK_GDL/R L2: 160MHz */ + {4, 7, 5, 7, 1}, + /* ACLK_GDL/R L3: 133MHz */ + {4, 7, 5, 7, 1}, + /* ACLK_GDL/R L4: 100MHz */ + {7, 7, 7, 7, 1}, +}; +static unsigned int exynos4x12_clkdiv_lr_bus[][2] = { + /* + * Clock divider value for following + * { DIVGDL/R, DIVGPL/R } + */ + + /* ACLK_GDL/R L0: 200MHz */ + {3, 1}, + /* ACLK_GDL/R L1: 200MHz */ + {3, 1}, + /* ACLK_GDL/R L2: 160MHz */ + {4, 1}, + /* ACLK_GDL/R L3: 133MHz */ + {5, 1}, + /* ACLK_GDL/R L4: 100MHz */ + {7, 1}, +}; +static unsigned int exynos4x12_clkdiv_sclkip[][3] = { + /* + * Clock divider value for following + * { DIVMFC, DIVJPEG, DIVFIMC0~3} + */ + + /* SCLK_MFC: 200MHz */ + {3, 3, 4}, + /* SCLK_MFC: 200MHz */ + {3, 3, 4}, + /* SCLK_MFC: 160MHz */ + {4, 4, 5}, + /* SCLK_MFC: 133MHz */ + {5, 5, 5}, + /* SCLK_MFC: 100MHz */ + {7, 7, 7}, +}; + + +static int exynos4210_set_busclk(struct busfreq_data *data, struct opp *opp) +{ + unsigned int index; + unsigned int tmp; + + for (index = LV_0; index < EX4210_LV_NUM; index++) + if (opp_get_freq(opp) == exynos4210_busclk_table[index].clk) + break; + + if (index == EX4210_LV_NUM) + return -EINVAL; + + /* Change Divider - DMC0 */ + tmp = data->dmc_divtable[index]; + + __raw_writel(tmp, S5P_CLKDIV_DMC0); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_DMC0); + } while (tmp & 0x11111111); + + /* Change Divider - TOP */ + tmp = data->top_divtable[index]; + + __raw_writel(tmp, S5P_CLKDIV_TOP); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_TOP); + } while (tmp & 0x11111); + + /* Change Divider - LEFTBUS */ + tmp = __raw_readl(S5P_CLKDIV_LEFTBUS); + + tmp &= ~(S5P_CLKDIV_BUS_GDLR_MASK | S5P_CLKDIV_BUS_GPLR_MASK); + + tmp |= ((exynos4210_clkdiv_lr_bus[index][0] << + S5P_CLKDIV_BUS_GDLR_SHIFT) | + (exynos4210_clkdiv_lr_bus[index][1] << + S5P_CLKDIV_BUS_GPLR_SHIFT)); + + __raw_writel(tmp, S5P_CLKDIV_LEFTBUS); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_LEFTBUS); + } while (tmp & 0x11); + + /* Change Divider - RIGHTBUS */ + tmp = __raw_readl(S5P_CLKDIV_RIGHTBUS); + + tmp &= ~(S5P_CLKDIV_BUS_GDLR_MASK | S5P_CLKDIV_BUS_GPLR_MASK); + + tmp |= ((exynos4210_clkdiv_lr_bus[index][0] << + S5P_CLKDIV_BUS_GDLR_SHIFT) | + (exynos4210_clkdiv_lr_bus[index][1] << + S5P_CLKDIV_BUS_GPLR_SHIFT)); + + __raw_writel(tmp, S5P_CLKDIV_RIGHTBUS); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_RIGHTBUS); + } while (tmp & 0x11); + + return 0; +} + +static int exynos4x12_set_busclk(struct busfreq_data *data, struct opp *opp) +{ + unsigned int index; + unsigned int tmp; + + for (index = LV_0; index < EX4x12_LV_NUM; index++) + if (opp_get_freq(opp) == exynos4x12_mifclk_table[index].clk) + break; + + if (index == EX4x12_LV_NUM) + return -EINVAL; + + /* Change Divider - DMC0 */ + tmp = data->dmc_divtable[index]; + + __raw_writel(tmp, S5P_CLKDIV_DMC0); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_DMC0); + } while (tmp & 0x11111111); + + /* Change Divider - DMC1 */ + tmp = __raw_readl(S5P_CLKDIV_DMC1); + + tmp &= ~(S5P_CLKDIV_DMC1_G2D_ACP_MASK | + S5P_CLKDIV_DMC1_C2C_MASK | + S5P_CLKDIV_DMC1_C2CACLK_MASK); + + tmp |= ((exynos4x12_clkdiv_dmc1[index][0] << + S5P_CLKDIV_DMC1_G2D_ACP_SHIFT) | + (exynos4x12_clkdiv_dmc1[index][1] << + S5P_CLKDIV_DMC1_C2C_SHIFT) | + (exynos4x12_clkdiv_dmc1[index][2] << + S5P_CLKDIV_DMC1_C2CACLK_SHIFT)); + + __raw_writel(tmp, S5P_CLKDIV_DMC1); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_DMC1); + } while (tmp & 0x111111); + + /* Change Divider - TOP */ + tmp = __raw_readl(S5P_CLKDIV_TOP); + + tmp &= ~(S5P_CLKDIV_TOP_ACLK266_GPS_MASK | + S5P_CLKDIV_TOP_ACLK100_MASK | + S5P_CLKDIV_TOP_ACLK160_MASK | + S5P_CLKDIV_TOP_ACLK133_MASK | + S5P_CLKDIV_TOP_ONENAND_MASK); + + tmp |= ((exynos4x12_clkdiv_top[index][0] << + S5P_CLKDIV_TOP_ACLK266_GPS_SHIFT) | + (exynos4x12_clkdiv_top[index][1] << + S5P_CLKDIV_TOP_ACLK100_SHIFT) | + (exynos4x12_clkdiv_top[index][2] << + S5P_CLKDIV_TOP_ACLK160_SHIFT) | + (exynos4x12_clkdiv_top[index][3] << + S5P_CLKDIV_TOP_ACLK133_SHIFT) | + (exynos4x12_clkdiv_top[index][4] << + S5P_CLKDIV_TOP_ONENAND_SHIFT)); + + __raw_writel(tmp, S5P_CLKDIV_TOP); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_TOP); + } while (tmp & 0x11111); + + /* Change Divider - LEFTBUS */ + tmp = __raw_readl(S5P_CLKDIV_LEFTBUS); + + tmp &= ~(S5P_CLKDIV_BUS_GDLR_MASK | S5P_CLKDIV_BUS_GPLR_MASK); + + tmp |= ((exynos4x12_clkdiv_lr_bus[index][0] << + S5P_CLKDIV_BUS_GDLR_SHIFT) | + (exynos4x12_clkdiv_lr_bus[index][1] << + S5P_CLKDIV_BUS_GPLR_SHIFT)); + + __raw_writel(tmp, S5P_CLKDIV_LEFTBUS); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_LEFTBUS); + } while (tmp & 0x11); + + /* Change Divider - RIGHTBUS */ + tmp = __raw_readl(S5P_CLKDIV_RIGHTBUS); + + tmp &= ~(S5P_CLKDIV_BUS_GDLR_MASK | S5P_CLKDIV_BUS_GPLR_MASK); + + tmp |= ((exynos4x12_clkdiv_lr_bus[index][0] << + S5P_CLKDIV_BUS_GDLR_SHIFT) | + (exynos4x12_clkdiv_lr_bus[index][1] << + S5P_CLKDIV_BUS_GPLR_SHIFT)); + + __raw_writel(tmp, S5P_CLKDIV_RIGHTBUS); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_RIGHTBUS); + } while (tmp & 0x11); + + /* Change Divider - MFC */ + tmp = __raw_readl(S5P_CLKDIV_MFC); + + tmp &= ~(S5P_CLKDIV_MFC_MASK); + + tmp |= ((exynos4x12_clkdiv_sclkip[index][0] << + S5P_CLKDIV_MFC_SHIFT)); + + __raw_writel(tmp, S5P_CLKDIV_MFC); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_MFC); + } while (tmp & 0x1); + + /* Change Divider - JPEG */ + tmp = __raw_readl(S5P_CLKDIV_CAM1); + + tmp &= ~(S5P_CLKDIV_CAM1_JPEG_MASK); + + tmp |= ((exynos4x12_clkdiv_sclkip[index][1] << + S5P_CLKDIV_CAM1_JPEG_SHIFT)); + + __raw_writel(tmp, S5P_CLKDIV_CAM1); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_CAM1); + } while (tmp & 0x1); + + /* Change Divider - FIMC0~3 */ + tmp = __raw_readl(S5P_CLKDIV_CAM); + + tmp &= ~(S5P_CLKDIV_CAM_FIMC0_MASK | S5P_CLKDIV_CAM_FIMC1_MASK | + S5P_CLKDIV_CAM_FIMC2_MASK | S5P_CLKDIV_CAM_FIMC3_MASK); + + tmp |= ((exynos4x12_clkdiv_sclkip[index][2] << + S5P_CLKDIV_CAM_FIMC0_SHIFT) | + (exynos4x12_clkdiv_sclkip[index][2] << + S5P_CLKDIV_CAM_FIMC1_SHIFT) | + (exynos4x12_clkdiv_sclkip[index][2] << + S5P_CLKDIV_CAM_FIMC2_SHIFT) | + (exynos4x12_clkdiv_sclkip[index][2] << + S5P_CLKDIV_CAM_FIMC3_SHIFT)); + + __raw_writel(tmp, S5P_CLKDIV_CAM); + + do { + tmp = __raw_readl(S5P_CLKDIV_STAT_CAM1); + } while (tmp & 0x1111); + + return 0; +} + + +static void busfreq_mon_reset(struct busfreq_data *data) +{ + unsigned int i; + + for (i = 0; i < 2; i++) { + void __iomem *ppmu_base = data->dmc[i].hw_base; + + /* Reset PPMU */ + __raw_writel(0x8000000f, ppmu_base + 0xf010); + __raw_writel(0x8000000f, ppmu_base + 0xf050); + __raw_writel(0x6, ppmu_base + 0xf000); + __raw_writel(0x0, ppmu_base + 0xf100); + + /* Set PPMU Event */ + data->dmc[i].event = 0x6; + __raw_writel(((data->dmc[i].event << 12) | 0x1), + ppmu_base + 0xfc); + + /* Start PPMU */ + __raw_writel(0x1, ppmu_base + 0xf000); + } +} + +static void exynos4_read_ppmu(struct busfreq_data *data) +{ + int i, j; + + for (i = 0; i < 2; i++) { + void __iomem *ppmu_base = data->dmc[i].hw_base; + u32 overflow; + + /* Stop PPMU */ + __raw_writel(0x0, ppmu_base + 0xf000); + + /* Update local data from PPMU */ + overflow = __raw_readl(ppmu_base + 0xf050); + + data->dmc[i].ccnt = __raw_readl(ppmu_base + 0xf100); + data->dmc[i].ccnt_overflow = overflow & (1 << 31); + + for (j = 0; j < PPMU_PMNCNT_MAX; j++) { + data->dmc[i].count[j] = __raw_readl( + ppmu_base + (0xf110 + (0x10 * j))); + data->dmc[i].count_overflow[j] = overflow & (1 << j); + } + } + + busfreq_mon_reset(data); +} + +static int exynos4x12_get_intspec(unsigned long mifclk) +{ + int i = 0; + + while (exynos4x12_intclk_table[i].clk) { + if (exynos4x12_intclk_table[i].clk <= mifclk) + return i; + i++; + } + + return -EINVAL; +} + +static int exynos4_bus_setvolt(struct busfreq_data *data, struct opp *opp, + struct opp *oldopp) +{ + int err = 0, tmp; + unsigned long volt = opp_get_voltage(opp); + + switch (data->type) { + case TYPE_BUSF_EXYNOS4210: + /* OPP represents DMC clock + INT voltage */ + err = regulator_set_voltage(data->vdd_int, volt, + MAX_SAFEVOLT); + break; + case TYPE_BUSF_EXYNOS4x12: + /* OPP represents MIF clock + MIF voltage */ + err = regulator_set_voltage(data->vdd_mif, volt, + MAX_SAFEVOLT); + if (err) + break; + + tmp = exynos4x12_get_intspec(opp_get_freq(opp)); + if (tmp < 0) { + err = tmp; + regulator_set_voltage(data->vdd_mif, + opp_get_voltage(oldopp), + MAX_SAFEVOLT); + break; + } + err = regulator_set_voltage(data->vdd_int, + exynos4x12_intclk_table[tmp].volt, + MAX_SAFEVOLT); + /* Try to recover */ + if (err) + regulator_set_voltage(data->vdd_mif, + opp_get_voltage(oldopp), + MAX_SAFEVOLT); + break; + default: + err = -EINVAL; + } + + return err; +} + +static int exynos4_bus_target(struct device *dev, unsigned long *_freq) +{ + int err = 0; + struct platform_device *pdev = container_of(dev, struct platform_device, + dev); + struct busfreq_data *data = platform_get_drvdata(pdev); + struct opp *opp = devfreq_recommended_opp(dev, _freq); + unsigned long old_freq = opp_get_freq(data->curr_opp); + unsigned long freq = opp_get_freq(opp); + + if (old_freq == freq) + return 0; + + dev_dbg(dev, "targetting %lukHz %luuV\n", freq, opp_get_voltage(opp)); + + mutex_lock(&data->lock); + + if (data->disabled) + goto out; + + if (old_freq < freq) + err = exynos4_bus_setvolt(data, opp, data->curr_opp); + if (err) + goto out; + + if (old_freq != freq) { + switch (data->type) { + case TYPE_BUSF_EXYNOS4210: + err = exynos4210_set_busclk(data, opp); + break; + case TYPE_BUSF_EXYNOS4x12: + err = exynos4x12_set_busclk(data, opp); + break; + default: + err = -EINVAL; + } + } + if (err) + goto out; + + if (old_freq > freq) + err = exynos4_bus_setvolt(data, opp, data->curr_opp); + if (err) + goto out; + + data->curr_opp = opp; +out: + mutex_unlock(&data->lock); + return err; +} + +static int exynos4_get_busier_dmc(struct busfreq_data *data) +{ + u64 p0 = data->dmc[0].count[0]; + u64 p1 = data->dmc[1].count[0]; + + p0 *= data->dmc[1].ccnt; + p1 *= data->dmc[0].ccnt; + + if (data->dmc[1].ccnt == 0) + return 0; + + if (p0 > p1) + return 0; + return 1; +} + +static int exynos4_bus_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + struct platform_device *pdev = container_of(dev, struct platform_device, + dev); + struct busfreq_data *data = platform_get_drvdata(pdev); + int busier_dmc; + int cycles_x2 = 2; /* 2 x cycles */ + void __iomem *addr; + u32 timing; + u32 memctrl; + + exynos4_read_ppmu(data); + busier_dmc = exynos4_get_busier_dmc(data); + stat->current_frequency = opp_get_freq(data->curr_opp); + + if (busier_dmc) + addr = S5P_VA_DMC1; + else + addr = S5P_VA_DMC0; + + memctrl = __raw_readl(addr + 0x04); /* one of DDR2/3/LPDDR2 */ + timing = __raw_readl(addr + 0x38); /* CL or WL/RL values */ + + switch ((memctrl >> 8) & 0xf) { + case 0x4: /* DDR2 */ + cycles_x2 = ((timing >> 16) & 0xf) * 2; + break; + case 0x5: /* LPDDR2 */ + case 0x6: /* DDR3 */ + cycles_x2 = ((timing >> 8) & 0xf) + ((timing >> 0) & 0xf); + break; + default: + pr_err("%s: Unknown Memory Type(%d).\n", __func__, + (memctrl >> 8) & 0xf); + return -EINVAL; + } + + /* Number of cycles spent on memory access */ + stat->busy_time = data->dmc[busier_dmc].count[0] / 2 * (cycles_x2 + 2); + stat->busy_time *= 100 / BUS_SATURATION_RATIO; + stat->total_time = data->dmc[busier_dmc].ccnt; + + /* If the counters have overflown, retry */ + if (data->dmc[busier_dmc].ccnt_overflow || + data->dmc[busier_dmc].count_overflow[0]) + return -EAGAIN; + + return 0; +} + +static void exynos4_bus_exit(struct device *dev) +{ + struct platform_device *pdev = container_of(dev, struct platform_device, + dev); + struct busfreq_data *data = platform_get_drvdata(pdev); + + devfreq_unregister_opp_notifier(dev, data->devfreq); +} + +static struct devfreq_dev_profile exynos4_devfreq_profile = { + .initial_freq = 400000, + .polling_ms = 50, + .target = exynos4_bus_target, + .get_dev_status = exynos4_bus_get_dev_status, + .exit = exynos4_bus_exit, +}; + +static int exynos4210_init_tables(struct busfreq_data *data) +{ + u32 tmp; + int mgrp; + int i, err = 0; + + tmp = __raw_readl(S5P_CLKDIV_DMC0); + for (i = LV_0; i < EX4210_LV_NUM; i++) { + tmp &= ~(S5P_CLKDIV_DMC0_ACP_MASK | + S5P_CLKDIV_DMC0_ACPPCLK_MASK | + S5P_CLKDIV_DMC0_DPHY_MASK | + S5P_CLKDIV_DMC0_DMC_MASK | + S5P_CLKDIV_DMC0_DMCD_MASK | + S5P_CLKDIV_DMC0_DMCP_MASK | + S5P_CLKDIV_DMC0_COPY2_MASK | + S5P_CLKDIV_DMC0_CORETI_MASK); + + tmp |= ((exynos4210_clkdiv_dmc0[i][0] << + S5P_CLKDIV_DMC0_ACP_SHIFT) | + (exynos4210_clkdiv_dmc0[i][1] << + S5P_CLKDIV_DMC0_ACPPCLK_SHIFT) | + (exynos4210_clkdiv_dmc0[i][2] << + S5P_CLKDIV_DMC0_DPHY_SHIFT) | + (exynos4210_clkdiv_dmc0[i][3] << + S5P_CLKDIV_DMC0_DMC_SHIFT) | + (exynos4210_clkdiv_dmc0[i][4] << + S5P_CLKDIV_DMC0_DMCD_SHIFT) | + (exynos4210_clkdiv_dmc0[i][5] << + S5P_CLKDIV_DMC0_DMCP_SHIFT) | + (exynos4210_clkdiv_dmc0[i][6] << + S5P_CLKDIV_DMC0_COPY2_SHIFT) | + (exynos4210_clkdiv_dmc0[i][7] << + S5P_CLKDIV_DMC0_CORETI_SHIFT)); + + data->dmc_divtable[i] = tmp; + } + + tmp = __raw_readl(S5P_CLKDIV_TOP); + for (i = LV_0; i < EX4210_LV_NUM; i++) { + tmp &= ~(S5P_CLKDIV_TOP_ACLK200_MASK | + S5P_CLKDIV_TOP_ACLK100_MASK | + S5P_CLKDIV_TOP_ACLK160_MASK | + S5P_CLKDIV_TOP_ACLK133_MASK | + S5P_CLKDIV_TOP_ONENAND_MASK); + + tmp |= ((exynos4210_clkdiv_top[i][0] << + S5P_CLKDIV_TOP_ACLK200_SHIFT) | + (exynos4210_clkdiv_top[i][1] << + S5P_CLKDIV_TOP_ACLK100_SHIFT) | + (exynos4210_clkdiv_top[i][2] << + S5P_CLKDIV_TOP_ACLK160_SHIFT) | + (exynos4210_clkdiv_top[i][3] << + S5P_CLKDIV_TOP_ACLK133_SHIFT) | + (exynos4210_clkdiv_top[i][4] << + S5P_CLKDIV_TOP_ONENAND_SHIFT)); + + data->top_divtable[i] = tmp; + } + +#ifdef CONFIG_EXYNOS_ASV + tmp = exynos4_result_of_asv; +#else + tmp = 0; /* Max voltages for the reliability of the unknown */ +#endif + + pr_debug("ASV Group of Exynos4 is %d\n", tmp); + /* Use merged grouping for voltage */ + switch (tmp) { + case 0: + mgrp = 0; + break; + case 1: + case 2: + mgrp = 1; + break; + case 3: + case 4: + mgrp = 2; + break; + case 5: + case 6: + mgrp = 3; + break; + case 7: + mgrp = 4; + break; + default: + pr_warn("Unknown ASV Group. Use max voltage.\n"); + mgrp = 0; + } + + for (i = LV_0; i < EX4210_LV_NUM; i++) + exynos4210_busclk_table[i].volt = exynos4210_asv_volt[mgrp][i]; + + for (i = LV_0; i < EX4210_LV_NUM; i++) { + err = opp_add(data->dev, exynos4210_busclk_table[i].clk, + exynos4210_busclk_table[i].volt); + if (err) { + dev_err(data->dev, "Cannot add opp entries.\n"); + return err; + } + } + + + return 0; +} + +static int exynos4x12_init_tables(struct busfreq_data *data) +{ + unsigned int i; + unsigned int tmp; + int ret; + + /* Enable pause function for DREX2 DVFS */ + tmp = __raw_readl(S5P_DMC_PAUSE_CTRL); + tmp |= DMC_PAUSE_ENABLE; + __raw_writel(tmp, S5P_DMC_PAUSE_CTRL); + + tmp = __raw_readl(S5P_CLKDIV_DMC0); + + for (i = 0; i < EX4x12_LV_NUM; i++) { + tmp &= ~(S5P_CLKDIV_DMC0_ACP_MASK | + S5P_CLKDIV_DMC0_ACPPCLK_MASK | + S5P_CLKDIV_DMC0_DPHY_MASK | + S5P_CLKDIV_DMC0_DMC_MASK | + S5P_CLKDIV_DMC0_DMCD_MASK | + S5P_CLKDIV_DMC0_DMCP_MASK); + + tmp |= ((exynos4x12_clkdiv_dmc0[i][0] << + S5P_CLKDIV_DMC0_ACP_SHIFT) | + (exynos4x12_clkdiv_dmc0[i][1] << + S5P_CLKDIV_DMC0_ACPPCLK_SHIFT) | + (exynos4x12_clkdiv_dmc0[i][2] << + S5P_CLKDIV_DMC0_DPHY_SHIFT) | + (exynos4x12_clkdiv_dmc0[i][3] << + S5P_CLKDIV_DMC0_DMC_SHIFT) | + (exynos4x12_clkdiv_dmc0[i][4] << + S5P_CLKDIV_DMC0_DMCD_SHIFT) | + (exynos4x12_clkdiv_dmc0[i][5] << + S5P_CLKDIV_DMC0_DMCP_SHIFT)); + + data->dmc_divtable[i] = tmp; + } + +#ifdef CONFIG_EXYNOS_ASV + tmp = exynos4_result_of_asv; +#else + tmp = 0; /* Max voltages for the reliability of the unknown */ +#endif + + if (tmp > 8) + tmp = 0; + pr_debug("ASV Group of Exynos4x12 is %d\n", tmp); + + for (i = 0; i < EX4x12_LV_NUM; i++) { + exynos4x12_mifclk_table[i].volt = + exynos4x12_mif_step_50[tmp][i]; + exynos4x12_intclk_table[i].volt = + exynos4x12_int_volt[tmp][i]; + } + + for (i = 0; i < EX4x12_LV_NUM; i++) { + ret = opp_add(data->dev, exynos4x12_mifclk_table[i].clk, + exynos4x12_mifclk_table[i].volt); + if (ret) { + dev_err(data->dev, "Fail to add opp entries.\n"); + return ret; + } + } + + return 0; +} + +static int exynos4_busfreq_pm_notifier_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct busfreq_data *data = container_of(this, struct busfreq_data, + pm_notifier); + struct opp *opp; + unsigned long maxfreq = ULONG_MAX; + int err = 0; + + switch (event) { + case PM_SUSPEND_PREPARE: + /* Set Fastest and Deactivate DVFS */ + mutex_lock(&data->lock); + + data->disabled = true; + + opp = opp_find_freq_floor(data->dev, &maxfreq); + + err = exynos4_bus_setvolt(data, opp, data->curr_opp); + if (err) + goto unlock; + + switch (data->type) { + case TYPE_BUSF_EXYNOS4210: + err = exynos4210_set_busclk(data, opp); + break; + case TYPE_BUSF_EXYNOS4x12: + err = exynos4x12_set_busclk(data, opp); + break; + default: + err = -EINVAL; + } + if (err) + goto unlock; + + data->curr_opp = opp; +unlock: + mutex_unlock(&data->lock); + if (err) + return err; + return NOTIFY_OK; + case PM_POST_RESTORE: + case PM_POST_SUSPEND: + /* Reactivate */ + mutex_lock(&data->lock); + data->disabled = false; + mutex_unlock(&data->lock); + return NOTIFY_OK; + } + + return NOTIFY_DONE; +} + +static __devinit int exynos4_busfreq_probe(struct platform_device *pdev) +{ + struct busfreq_data *data; + struct opp *opp; + struct device *dev = &pdev->dev; + int err = 0; + + data = kzalloc(sizeof(struct busfreq_data), GFP_KERNEL); + if (data == NULL) { + dev_err(dev, "Cannot allocate memory.\n"); + return -ENOMEM; + } + + data->type = pdev->id_entry->driver_data; + data->dmc[0].hw_base = S5P_VA_DMC0; + data->dmc[1].hw_base = S5P_VA_DMC1; + data->pm_notifier.notifier_call = exynos4_busfreq_pm_notifier_event; + data->dev = dev; + mutex_init(&data->lock); + + switch (data->type) { + case TYPE_BUSF_EXYNOS4210: + err = exynos4210_init_tables(data); + break; + case TYPE_BUSF_EXYNOS4x12: + err = exynos4x12_init_tables(data); + break; + default: + dev_err(dev, "Cannot determine the device id %d\n", data->type); + err = -EINVAL; + } + if (err) + goto err_regulator; + + data->vdd_int = regulator_get(dev, "vdd_int"); + if (IS_ERR(data->vdd_int)) { + dev_err(dev, "Cannot get the regulator \"vdd_int\"\n"); + err = PTR_ERR(data->vdd_int); + goto err_regulator; + } + if (data->type == TYPE_BUSF_EXYNOS4x12) { + data->vdd_mif = regulator_get(dev, "vdd_mif"); + if (IS_ERR(data->vdd_mif)) { + dev_err(dev, "Cannot get the regulator \"vdd_mif\"\n"); + err = PTR_ERR(data->vdd_mif); + regulator_put(data->vdd_int); + goto err_regulator; + + } + } + + opp = opp_find_freq_floor(dev, &exynos4_devfreq_profile.initial_freq); + if (IS_ERR(opp)) { + dev_err(dev, "Invalid initial frequency %lu kHz.\n", + exynos4_devfreq_profile.initial_freq); + err = PTR_ERR(opp); + goto err_opp_add; + } + data->curr_opp = opp; + + platform_set_drvdata(pdev, data); + + busfreq_mon_reset(data); + + data->devfreq = devfreq_add_device(dev, &exynos4_devfreq_profile, + &devfreq_simple_ondemand, NULL); + if (IS_ERR(data->devfreq)) { + err = PTR_ERR(data->devfreq); + goto err_opp_add; + } + + devfreq_register_opp_notifier(dev, data->devfreq); + + err = register_pm_notifier(&data->pm_notifier); + if (err) { + dev_err(dev, "Failed to setup pm notifier\n"); + goto err_devfreq_add; + } + + return 0; +err_devfreq_add: + devfreq_remove_device(data->devfreq); +err_opp_add: + if (data->vdd_mif) + regulator_put(data->vdd_mif); + regulator_put(data->vdd_int); +err_regulator: + kfree(data); + return err; +} + +static __devexit int exynos4_busfreq_remove(struct platform_device *pdev) +{ + struct busfreq_data *data = platform_get_drvdata(pdev); + + unregister_pm_notifier(&data->pm_notifier); + devfreq_remove_device(data->devfreq); + regulator_put(data->vdd_int); + if (data->vdd_mif) + regulator_put(data->vdd_mif); + kfree(data); + + return 0; +} + +static int exynos4_busfreq_resume(struct device *dev) +{ + struct platform_device *pdev = container_of(dev, struct platform_device, + dev); + struct busfreq_data *data = platform_get_drvdata(pdev); + + busfreq_mon_reset(data); + return 0; +} + +static const struct dev_pm_ops exynos4_busfreq_pm = { + .resume = exynos4_busfreq_resume, +}; + +static const struct platform_device_id exynos4_busfreq_id[] = { + { "exynos4210-busfreq", TYPE_BUSF_EXYNOS4210 }, + { "exynos4412-busfreq", TYPE_BUSF_EXYNOS4x12 }, + { "exynos4212-busfreq", TYPE_BUSF_EXYNOS4x12 }, + { }, +}; + +static struct platform_driver exynos4_busfreq_driver = { + .probe = exynos4_busfreq_probe, + .remove = __devexit_p(exynos4_busfreq_remove), + .id_table = exynos4_busfreq_id, + .driver = { + .name = "exynos4-busfreq", + .owner = THIS_MODULE, + .pm = &exynos4_busfreq_pm, + }, +}; + +static int __init exynos4_busfreq_init(void) +{ + return platform_driver_register(&exynos4_busfreq_driver); +} +late_initcall(exynos4_busfreq_init); + +static void __exit exynos4_busfreq_exit(void) +{ + platform_driver_unregister(&exynos4_busfreq_driver); +} +module_exit(exynos4_busfreq_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("EXYNOS4 busfreq driver with devfreq framework"); +MODULE_AUTHOR("MyungJoo Ham "); +MODULE_ALIAS("exynos4-busfreq"); -- cgit v0.10.2 From 9cf519d1c15fa05a538c2b3963c5f3903daf765a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 18 Dec 2011 00:34:01 +0100 Subject: PM / Sleep: Make pm_op() and pm_noirq_op() return callback pointers Make the pm_op() and pm_noirq_op() functions return pointers to appropriate callbacks instead of executing those callbacks and returning their results. This change is required for a subsequent modification that will execute the corresponding driver callback if the subsystem callback returned by either pm_op(), or pm_noirq_op() is NULL. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index b570189..b5cef7e 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -32,6 +32,8 @@ #include "../base.h" #include "power.h" +typedef int (*pm_callback_t)(struct device *); + /* * The entries in the dpm_list list are in a depth first order, simply * because children are guaranteed to be discovered after parents, and @@ -211,113 +213,70 @@ static void dpm_wait_for_children(struct device *dev, bool async) device_for_each_child(dev, &async, dpm_wait_fn); } -static int dpm_run_callback(struct device *dev, int (*cb)(struct device *)) -{ - ktime_t calltime; - int error; - - if (!cb) - return 0; - - calltime = initcall_debug_start(dev); - - error = cb(dev); - suspend_report_result(cb, error); - - initcall_debug_report(dev, calltime, error); - - return error; -} - /** - * pm_op - Execute the PM operation appropriate for given PM event. - * @dev: Device to handle. + * pm_op - Return the PM operation appropriate for given PM event. * @ops: PM operations to choose from. * @state: PM transition of the system being carried out. */ -static int pm_op(struct device *dev, - const struct dev_pm_ops *ops, - pm_message_t state) +static pm_callback_t pm_op(const struct dev_pm_ops *ops, pm_message_t state) { - int error = 0; - switch (state.event) { #ifdef CONFIG_SUSPEND case PM_EVENT_SUSPEND: - error = dpm_run_callback(dev, ops->suspend); - break; + return ops->suspend; case PM_EVENT_RESUME: - error = dpm_run_callback(dev, ops->resume); - break; + return ops->resume; #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATE_CALLBACKS case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: - error = dpm_run_callback(dev, ops->freeze); - break; + return ops->freeze; case PM_EVENT_HIBERNATE: - error = dpm_run_callback(dev, ops->poweroff); - break; + return ops->poweroff; case PM_EVENT_THAW: case PM_EVENT_RECOVER: - error = dpm_run_callback(dev, ops->thaw); + return ops->thaw; break; case PM_EVENT_RESTORE: - error = dpm_run_callback(dev, ops->restore); - break; + return ops->restore; #endif /* CONFIG_HIBERNATE_CALLBACKS */ - default: - error = -EINVAL; } - return error; + return NULL; } /** - * pm_noirq_op - Execute the PM operation appropriate for given PM event. - * @dev: Device to handle. + * pm_noirq_op - Return the PM operation appropriate for given PM event. * @ops: PM operations to choose from. * @state: PM transition of the system being carried out. * * The driver of @dev will not receive interrupts while this function is being * executed. */ -static int pm_noirq_op(struct device *dev, - const struct dev_pm_ops *ops, - pm_message_t state) +static pm_callback_t pm_noirq_op(const struct dev_pm_ops *ops, pm_message_t state) { - int error = 0; - switch (state.event) { #ifdef CONFIG_SUSPEND case PM_EVENT_SUSPEND: - error = dpm_run_callback(dev, ops->suspend_noirq); - break; + return ops->suspend_noirq; case PM_EVENT_RESUME: - error = dpm_run_callback(dev, ops->resume_noirq); - break; + return ops->resume_noirq; #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATE_CALLBACKS case PM_EVENT_FREEZE: case PM_EVENT_QUIESCE: - error = dpm_run_callback(dev, ops->freeze_noirq); - break; + return ops->freeze_noirq; case PM_EVENT_HIBERNATE: - error = dpm_run_callback(dev, ops->poweroff_noirq); - break; + return ops->poweroff_noirq; case PM_EVENT_THAW: case PM_EVENT_RECOVER: - error = dpm_run_callback(dev, ops->thaw_noirq); - break; + return ops->thaw_noirq; case PM_EVENT_RESTORE: - error = dpm_run_callback(dev, ops->restore_noirq); - break; + return ops->restore_noirq; #endif /* CONFIG_HIBERNATE_CALLBACKS */ - default: - error = -EINVAL; } - return error; + return NULL; } static char *pm_verb(int event) @@ -375,6 +334,26 @@ static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info) usecs / USEC_PER_MSEC, usecs % USEC_PER_MSEC); } +static int dpm_run_callback(pm_callback_t cb, struct device *dev, + pm_message_t state, char *info) +{ + ktime_t calltime; + int error; + + if (!cb) + return 0; + + calltime = initcall_debug_start(dev); + + pm_dev_dbg(dev, state, info); + error = cb(dev); + suspend_report_result(cb, error); + + initcall_debug_report(dev, calltime, error); + + return error; +} + /*------------------------- Resume routines -------------------------*/ /** @@ -387,25 +366,29 @@ static void dpm_show_time(ktime_t starttime, pm_message_t state, char *info) */ static int device_resume_noirq(struct device *dev, pm_message_t state) { + pm_callback_t callback = NULL; + char *info = NULL; int error = 0; TRACE_DEVICE(dev); TRACE_RESUME(0); if (dev->pm_domain) { - pm_dev_dbg(dev, state, "EARLY power domain "); - error = pm_noirq_op(dev, &dev->pm_domain->ops, state); + info = "EARLY power domain "; + callback = pm_noirq_op(&dev->pm_domain->ops, state); } else if (dev->type && dev->type->pm) { - pm_dev_dbg(dev, state, "EARLY type "); - error = pm_noirq_op(dev, dev->type->pm, state); + info = "EARLY type "; + callback = pm_noirq_op(dev->type->pm, state); } else if (dev->class && dev->class->pm) { - pm_dev_dbg(dev, state, "EARLY class "); - error = pm_noirq_op(dev, dev->class->pm, state); + info = "EARLY class "; + callback = pm_noirq_op(dev->class->pm, state); } else if (dev->bus && dev->bus->pm) { - pm_dev_dbg(dev, state, "EARLY "); - error = pm_noirq_op(dev, dev->bus->pm, state); + info = "EARLY "; + callback = pm_noirq_op(dev->bus->pm, state); } + error = dpm_run_callback(callback, dev, state, info); + TRACE_RESUME(error); return error; } @@ -455,6 +438,8 @@ EXPORT_SYMBOL_GPL(dpm_resume_noirq); */ static int device_resume(struct device *dev, pm_message_t state, bool async) { + pm_callback_t callback = NULL; + char *info = NULL; int error = 0; bool put = false; @@ -477,40 +462,41 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) put = true; if (dev->pm_domain) { - pm_dev_dbg(dev, state, "power domain "); - error = pm_op(dev, &dev->pm_domain->ops, state); + info = "power domain "; + callback = pm_op(&dev->pm_domain->ops, state); goto End; } if (dev->type && dev->type->pm) { - pm_dev_dbg(dev, state, "type "); - error = pm_op(dev, dev->type->pm, state); + info = "type "; + callback = pm_op(dev->type->pm, state); goto End; } if (dev->class) { if (dev->class->pm) { - pm_dev_dbg(dev, state, "class "); - error = pm_op(dev, dev->class->pm, state); + info = "class "; + callback = pm_op(dev->class->pm, state); goto End; } else if (dev->class->resume) { - pm_dev_dbg(dev, state, "legacy class "); - error = dpm_run_callback(dev, dev->class->resume); + info = "legacy class "; + callback = dev->class->resume; goto End; } } if (dev->bus) { if (dev->bus->pm) { - pm_dev_dbg(dev, state, ""); - error = pm_op(dev, dev->bus->pm, state); + info = ""; + callback = pm_op(dev->bus->pm, state); } else if (dev->bus->resume) { - pm_dev_dbg(dev, state, "legacy "); - error = dpm_run_callback(dev, dev->bus->resume); + info = "legacy "; + callback = dev->bus->resume; } } End: + error = dpm_run_callback(callback, dev, state, info); dev->power.is_suspended = false; Unlock: @@ -705,23 +691,24 @@ static pm_message_t resume_event(pm_message_t sleep_state) */ static int device_suspend_noirq(struct device *dev, pm_message_t state) { - int error = 0; + pm_callback_t callback = NULL; + char *info = NULL; if (dev->pm_domain) { - pm_dev_dbg(dev, state, "LATE power domain "); - error = pm_noirq_op(dev, &dev->pm_domain->ops, state); + info = "LATE power domain "; + callback = pm_noirq_op(&dev->pm_domain->ops, state); } else if (dev->type && dev->type->pm) { - pm_dev_dbg(dev, state, "LATE type "); - error = pm_noirq_op(dev, dev->type->pm, state); + info = "LATE type "; + callback = pm_noirq_op(dev->type->pm, state); } else if (dev->class && dev->class->pm) { - pm_dev_dbg(dev, state, "LATE class "); - error = pm_noirq_op(dev, dev->class->pm, state); + info = "LATE class "; + callback = pm_noirq_op(dev->class->pm, state); } else if (dev->bus && dev->bus->pm) { - pm_dev_dbg(dev, state, "LATE "); - error = pm_noirq_op(dev, dev->bus->pm, state); + info = "LATE "; + callback = pm_noirq_op(dev->bus->pm, state); } - return error; + return dpm_run_callback(callback, dev, state, info); } /** @@ -798,6 +785,8 @@ static int legacy_suspend(struct device *dev, pm_message_t state, */ static int __device_suspend(struct device *dev, pm_message_t state, bool async) { + pm_callback_t callback = NULL; + char *info = NULL; int error = 0; dpm_wait_for_children(dev, async); @@ -818,22 +807,22 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) device_lock(dev); if (dev->pm_domain) { - pm_dev_dbg(dev, state, "power domain "); - error = pm_op(dev, &dev->pm_domain->ops, state); - goto End; + info = "power domain "; + callback = pm_op(&dev->pm_domain->ops, state); + goto Run; } if (dev->type && dev->type->pm) { - pm_dev_dbg(dev, state, "type "); - error = pm_op(dev, dev->type->pm, state); - goto End; + info = "type "; + callback = pm_op(dev->type->pm, state); + goto Run; } if (dev->class) { if (dev->class->pm) { - pm_dev_dbg(dev, state, "class "); - error = pm_op(dev, dev->class->pm, state); - goto End; + info = "class "; + callback = pm_op(dev->class->pm, state); + goto Run; } else if (dev->class->suspend) { pm_dev_dbg(dev, state, "legacy class "); error = legacy_suspend(dev, state, dev->class->suspend); @@ -843,14 +832,18 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) if (dev->bus) { if (dev->bus->pm) { - pm_dev_dbg(dev, state, ""); - error = pm_op(dev, dev->bus->pm, state); + info = ""; + callback = pm_op(dev->bus->pm, state); } else if (dev->bus->suspend) { pm_dev_dbg(dev, state, "legacy "); error = legacy_suspend(dev, state, dev->bus->suspend); + goto End; } } + Run: + error = dpm_run_callback(callback, dev, state, info); + End: if (!error) { dev->power.is_suspended = true; -- cgit v0.10.2 From 35cd133c6130c1eb52806808abee9d62e6854a27 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 18 Dec 2011 00:34:13 +0100 Subject: PM: Run the driver callback directly if the subsystem one is not there Make the PM core execute driver PM callbacks directly if the corresponding subsystem callbacks are not present. There are three reasons for doing that. First, it reflects the behavior of drivers/base/dd.c:really_probe() that runs the driver's .probe() callback directly if the bus type's one is not defined, so this change will remove one arbitrary difference between the PM core and the remaining parts of the driver core. Second, it will allow some subsystems, whose PM callbacks don't do anything except for executing driver callbacks, to be simplified quite a bit by removing those "forward-only" callbacks. Finally, it will allow us to remove one level of indirection in the system suspend and resume code paths where it is not necessary, which is going to lead to less debug noise with initcall_debug passed in the kernel command line (messages won't be printed for driverless devices whose subsystems don't provide PM callbacks among other things). Signed-off-by: Rafael J. Wysocki diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 3139fb5..20af7de 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -126,7 +126,9 @@ The core methods to suspend and resume devices reside in struct dev_pm_ops pointed to by the ops member of struct dev_pm_domain, or by the pm member of struct bus_type, struct device_type and struct class. They are mostly of interest to the people writing infrastructure for platforms and buses, like PCI -or USB, or device type and device class drivers. +or USB, or device type and device class drivers. They also are relevant to the +writers of device drivers whose subsystems (PM domains, device types, device +classes and bus types) don't provide all power management methods. Bus drivers implement these methods as appropriate for the hardware and the drivers using it; PCI works differently from USB, and so on. Not many people @@ -268,32 +270,35 @@ various phases always run after tasks have been frozen and before they are unfrozen. Furthermore, the *_noirq phases run at a time when IRQ handlers have been disabled (except for those marked with the IRQF_NO_SUSPEND flag). -All phases use PM domain, bus, type, or class callbacks (that is, methods -defined in dev->pm_domain->ops, dev->bus->pm, dev->type->pm, or dev->class->pm). -These callbacks are regarded by the PM core as mutually exclusive. Moreover, -PM domain callbacks always take precedence over bus, type and class callbacks, -while type callbacks take precedence over bus and class callbacks, and class -callbacks take precedence over bus callbacks. To be precise, the following -rules are used to determine which callback to execute in the given phase: +All phases use PM domain, bus, type, class or driver callbacks (that is, methods +defined in dev->pm_domain->ops, dev->bus->pm, dev->type->pm, dev->class->pm or +dev->driver->pm). These callbacks are regarded by the PM core as mutually +exclusive. Moreover, PM domain callbacks always take precedence over all of the +other callbacks and, for example, type callbacks take precedence over bus, class +and driver callbacks. To be precise, the following rules are used to determine +which callback to execute in the given phase: - 1. If dev->pm_domain is present, the PM core will attempt to execute the - callback included in dev->pm_domain->ops. If that callback is not - present, no action will be carried out for the given device. + 1. If dev->pm_domain is present, the PM core will choose the callback + included in dev->pm_domain->ops for execution 2. Otherwise, if both dev->type and dev->type->pm are present, the callback - included in dev->type->pm will be executed. + included in dev->type->pm will be chosen for execution. 3. Otherwise, if both dev->class and dev->class->pm are present, the - callback included in dev->class->pm will be executed. + callback included in dev->class->pm will be chosen for execution. 4. Otherwise, if both dev->bus and dev->bus->pm are present, the callback - included in dev->bus->pm will be executed. + included in dev->bus->pm will be chosen for execution. This allows PM domains and device types to override callbacks provided by bus types or device classes if necessary. -These callbacks may in turn invoke device- or driver-specific methods stored in -dev->driver->pm, but they don't have to. +The PM domain, type, class and bus callbacks may in turn invoke device- or +driver-specific methods stored in dev->driver->pm, but they don't have to do +that. + +If the subsystem callback chosen for execution is not present, the PM core will +execute the corresponding method from dev->driver->pm instead if there is one. Entering System Suspend diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index c2ae8bf..4abe83e 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -57,6 +57,10 @@ the following: 4. Bus type of the device, if both dev->bus and dev->bus->pm are present. +If the subsystem chosen by applying the above rules doesn't provide the relevant +callback, the PM core will invoke the corresponding driver callback stored in +dev->driver->pm directly (if present). + The PM core always checks which callback to use in the order given above, so the priority order of callbacks from high to low is: PM domain, device type, class and bus type. Moreover, the high-priority one will always take precedence over @@ -64,86 +68,88 @@ a low-priority one. The PM domain, bus type, device type and class callbacks are referred to as subsystem-level callbacks in what follows. By default, the callbacks are always invoked in process context with interrupts -enabled. However, subsystems can use the pm_runtime_irq_safe() helper function -to tell the PM core that their ->runtime_suspend(), ->runtime_resume() and -->runtime_idle() callbacks may be invoked in atomic context with interrupts -disabled for a given device. This implies that the callback routines in -question must not block or sleep, but it also means that the synchronous helper -functions listed at the end of Section 4 may be used for that device within an -interrupt handler or generally in an atomic context. - -The subsystem-level suspend callback is _entirely_ _responsible_ for handling -the suspend of the device as appropriate, which may, but need not include -executing the device driver's own ->runtime_suspend() callback (from the +enabled. However, the pm_runtime_irq_safe() helper function can be used to tell +the PM core that it is safe to run the ->runtime_suspend(), ->runtime_resume() +and ->runtime_idle() callbacks for the given device in atomic context with +interrupts disabled. This implies that the callback routines in question must +not block or sleep, but it also means that the synchronous helper functions +listed at the end of Section 4 may be used for that device within an interrupt +handler or generally in an atomic context. + +The subsystem-level suspend callback, if present, is _entirely_ _responsible_ +for handling the suspend of the device as appropriate, which may, but need not +include executing the device driver's own ->runtime_suspend() callback (from the PM core's point of view it is not necessary to implement a ->runtime_suspend() callback in a device driver as long as the subsystem-level suspend callback knows what to do to handle the device). - * Once the subsystem-level suspend callback has completed successfully - for given device, the PM core regards the device as suspended, which need - not mean that the device has been put into a low power state. It is - supposed to mean, however, that the device will not process data and will - not communicate with the CPU(s) and RAM until the subsystem-level resume - callback is executed for it. The runtime PM status of a device after - successful execution of the subsystem-level suspend callback is 'suspended'. - - * If the subsystem-level suspend callback returns -EBUSY or -EAGAIN, - the device's runtime PM status is 'active', which means that the device - _must_ be fully operational afterwards. - - * If the subsystem-level suspend callback returns an error code different - from -EBUSY or -EAGAIN, the PM core regards this as a fatal error and will - refuse to run the helper functions described in Section 4 for the device, - until the status of it is directly set either to 'active', or to 'suspended' - (the PM core provides special helper functions for this purpose). - -In particular, if the driver requires remote wake-up capability (i.e. hardware + * Once the subsystem-level suspend callback (or the driver suspend callback, + if invoked directly) has completed successfully for the given device, the PM + core regards the device as suspended, which need not mean that it has been + put into a low power state. It is supposed to mean, however, that the + device will not process data and will not communicate with the CPU(s) and + RAM until the appropriate resume callback is executed for it. The runtime + PM status of a device after successful execution of the suspend callback is + 'suspended'. + + * If the suspend callback returns -EBUSY or -EAGAIN, the device's runtime PM + status remains 'active', which means that the device _must_ be fully + operational afterwards. + + * If the suspend callback returns an error code different from -EBUSY and + -EAGAIN, the PM core regards this as a fatal error and will refuse to run + the helper functions described in Section 4 for the device until its status + is directly set to either'active', or 'suspended' (the PM core provides + special helper functions for this purpose). + +In particular, if the driver requires remote wakeup capability (i.e. hardware mechanism allowing the device to request a change of its power state, such as PCI PME) for proper functioning and device_run_wake() returns 'false' for the device, then ->runtime_suspend() should return -EBUSY. On the other hand, if -device_run_wake() returns 'true' for the device and the device is put into a low -power state during the execution of the subsystem-level suspend callback, it is -expected that remote wake-up will be enabled for the device. Generally, remote -wake-up should be enabled for all input devices put into a low power state at -run time. - -The subsystem-level resume callback is _entirely_ _responsible_ for handling the -resume of the device as appropriate, which may, but need not include executing -the device driver's own ->runtime_resume() callback (from the PM core's point of -view it is not necessary to implement a ->runtime_resume() callback in a device -driver as long as the subsystem-level resume callback knows what to do to handle -the device). - - * Once the subsystem-level resume callback has completed successfully, the PM - core regards the device as fully operational, which means that the device - _must_ be able to complete I/O operations as needed. The runtime PM status - of the device is then 'active'. - - * If the subsystem-level resume callback returns an error code, the PM core - regards this as a fatal error and will refuse to run the helper functions - described in Section 4 for the device, until its status is directly set - either to 'active' or to 'suspended' (the PM core provides special helper - functions for this purpose). - -The subsystem-level idle callback is executed by the PM core whenever the device -appears to be idle, which is indicated to the PM core by two counters, the -device's usage counter and the counter of 'active' children of the device. +device_run_wake() returns 'true' for the device and the device is put into a +low-power state during the execution of the suspend callback, it is expected +that remote wakeup will be enabled for the device. Generally, remote wakeup +should be enabled for all input devices put into low-power states at run time. + +The subsystem-level resume callback, if present, is _entirely_ _responsible_ for +handling the resume of the device as appropriate, which may, but need not +include executing the device driver's own ->runtime_resume() callback (from the +PM core's point of view it is not necessary to implement a ->runtime_resume() +callback in a device driver as long as the subsystem-level resume callback knows +what to do to handle the device). + + * Once the subsystem-level resume callback (or the driver resume callback, if + invoked directly) has completed successfully, the PM core regards the device + as fully operational, which means that the device _must_ be able to complete + I/O operations as needed. The runtime PM status of the device is then + 'active'. + + * If the resume callback returns an error code, the PM core regards this as a + fatal error and will refuse to run the helper functions described in Section + 4 for the device, until its status is directly set to either 'active', or + 'suspended' (by means of special helper functions provided by the PM core + for this purpose). + +The idle callback (a subsystem-level one, if present, or the driver one) is +executed by the PM core whenever the device appears to be idle, which is +indicated to the PM core by two counters, the device's usage counter and the +counter of 'active' children of the device. * If any of these counters is decreased using a helper function provided by the PM core and it turns out to be equal to zero, the other counter is checked. If that counter also is equal to zero, the PM core executes the - subsystem-level idle callback with the device as an argument. + idle callback with the device as its argument. -The action performed by a subsystem-level idle callback is totally dependent on -the subsystem in question, but the expected and recommended action is to check +The action performed by the idle callback is totally dependent on the subsystem +(or driver) in question, but the expected and recommended action is to check if the device can be suspended (i.e. if all of the conditions necessary for suspending the device are satisfied) and to queue up a suspend request for the device in that case. The value returned by this callback is ignored by the PM core. The helper functions provided by the PM core, described in Section 4, guarantee -that the following constraints are met with respect to the bus type's runtime -PM callbacks: +that the following constraints are met with respect to runtime PM callbacks for +one device: (1) The callbacks are mutually exclusive (e.g. it is forbidden to execute ->runtime_suspend() in parallel with ->runtime_resume() or with another diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index b5cef7e..e2cc3d2 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -383,10 +383,15 @@ static int device_resume_noirq(struct device *dev, pm_message_t state) info = "EARLY class "; callback = pm_noirq_op(dev->class->pm, state); } else if (dev->bus && dev->bus->pm) { - info = "EARLY "; + info = "EARLY bus "; callback = pm_noirq_op(dev->bus->pm, state); } + if (!callback && dev->driver && dev->driver->pm) { + info = "EARLY driver "; + callback = pm_noirq_op(dev->driver->pm, state); + } + error = dpm_run_callback(callback, dev, state, info); TRACE_RESUME(error); @@ -464,20 +469,20 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) if (dev->pm_domain) { info = "power domain "; callback = pm_op(&dev->pm_domain->ops, state); - goto End; + goto Driver; } if (dev->type && dev->type->pm) { info = "type "; callback = pm_op(dev->type->pm, state); - goto End; + goto Driver; } if (dev->class) { if (dev->class->pm) { info = "class "; callback = pm_op(dev->class->pm, state); - goto End; + goto Driver; } else if (dev->class->resume) { info = "legacy class "; callback = dev->class->resume; @@ -487,14 +492,21 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) if (dev->bus) { if (dev->bus->pm) { - info = ""; + info = "bus "; callback = pm_op(dev->bus->pm, state); } else if (dev->bus->resume) { - info = "legacy "; + info = "legacy bus "; callback = dev->bus->resume; + goto End; } } + Driver: + if (!callback && dev->driver && dev->driver->pm) { + info = "driver "; + callback = pm_op(dev->driver->pm, state); + } + End: error = dpm_run_callback(callback, dev, state, info); dev->power.is_suspended = false; @@ -588,24 +600,33 @@ void dpm_resume(pm_message_t state) */ static void device_complete(struct device *dev, pm_message_t state) { + void (*callback)(struct device *) = NULL; + char *info = NULL; + device_lock(dev); if (dev->pm_domain) { - pm_dev_dbg(dev, state, "completing power domain "); - if (dev->pm_domain->ops.complete) - dev->pm_domain->ops.complete(dev); + info = "completing power domain "; + callback = dev->pm_domain->ops.complete; } else if (dev->type && dev->type->pm) { - pm_dev_dbg(dev, state, "completing type "); - if (dev->type->pm->complete) - dev->type->pm->complete(dev); + info = "completing type "; + callback = dev->type->pm->complete; } else if (dev->class && dev->class->pm) { - pm_dev_dbg(dev, state, "completing class "); - if (dev->class->pm->complete) - dev->class->pm->complete(dev); + info = "completing class "; + callback = dev->class->pm->complete; } else if (dev->bus && dev->bus->pm) { - pm_dev_dbg(dev, state, "completing "); - if (dev->bus->pm->complete) - dev->bus->pm->complete(dev); + info = "completing bus "; + callback = dev->bus->pm->complete; + } + + if (!callback && dev->driver && dev->driver->pm) { + info = "completing driver "; + callback = dev->driver->pm->complete; + } + + if (callback) { + pm_dev_dbg(dev, state, info); + callback(dev); } device_unlock(dev); @@ -704,10 +725,15 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state) info = "LATE class "; callback = pm_noirq_op(dev->class->pm, state); } else if (dev->bus && dev->bus->pm) { - info = "LATE "; + info = "LATE bus "; callback = pm_noirq_op(dev->bus->pm, state); } + if (!callback && dev->driver && dev->driver->pm) { + info = "LATE driver "; + callback = pm_noirq_op(dev->driver->pm, state); + } + return dpm_run_callback(callback, dev, state, info); } @@ -832,16 +858,21 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) if (dev->bus) { if (dev->bus->pm) { - info = ""; + info = "bus "; callback = pm_op(dev->bus->pm, state); } else if (dev->bus->suspend) { - pm_dev_dbg(dev, state, "legacy "); + pm_dev_dbg(dev, state, "legacy bus "); error = legacy_suspend(dev, state, dev->bus->suspend); goto End; } } Run: + if (!callback && dev->driver && dev->driver->pm) { + info = "driver "; + callback = pm_op(dev->driver->pm, state); + } + error = dpm_run_callback(callback, dev, state, info); End: @@ -949,6 +980,8 @@ int dpm_suspend(pm_message_t state) */ static int device_prepare(struct device *dev, pm_message_t state) { + int (*callback)(struct device *) = NULL; + char *info = NULL; int error = 0; device_lock(dev); @@ -956,25 +989,27 @@ static int device_prepare(struct device *dev, pm_message_t state) dev->power.wakeup_path = device_may_wakeup(dev); if (dev->pm_domain) { - pm_dev_dbg(dev, state, "preparing power domain "); - if (dev->pm_domain->ops.prepare) - error = dev->pm_domain->ops.prepare(dev); - suspend_report_result(dev->pm_domain->ops.prepare, error); + info = "preparing power domain "; + callback = dev->pm_domain->ops.prepare; } else if (dev->type && dev->type->pm) { - pm_dev_dbg(dev, state, "preparing type "); - if (dev->type->pm->prepare) - error = dev->type->pm->prepare(dev); - suspend_report_result(dev->type->pm->prepare, error); + info = "preparing type "; + callback = dev->type->pm->prepare; } else if (dev->class && dev->class->pm) { - pm_dev_dbg(dev, state, "preparing class "); - if (dev->class->pm->prepare) - error = dev->class->pm->prepare(dev); - suspend_report_result(dev->class->pm->prepare, error); + info = "preparing class "; + callback = dev->class->pm->prepare; } else if (dev->bus && dev->bus->pm) { - pm_dev_dbg(dev, state, "preparing "); - if (dev->bus->pm->prepare) - error = dev->bus->pm->prepare(dev); - suspend_report_result(dev->bus->pm->prepare, error); + info = "preparing bus "; + callback = dev->bus->pm->prepare; + } + + if (!callback && dev->driver && dev->driver->pm) { + info = "preparing driver "; + callback = dev->driver->pm->prepare; + } + + if (callback) { + error = callback(dev); + suspend_report_result(callback, error); } device_unlock(dev); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 8c78443..c56efd7 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -250,6 +250,9 @@ static int rpm_idle(struct device *dev, int rpmflags) else callback = NULL; + if (!callback && dev->driver && dev->driver->pm) + callback = dev->driver->pm->runtime_idle; + if (callback) __rpm_callback(callback, dev); @@ -413,6 +416,9 @@ static int rpm_suspend(struct device *dev, int rpmflags) else callback = NULL; + if (!callback && dev->driver && dev->driver->pm) + callback = dev->driver->pm->runtime_suspend; + retval = rpm_callback(callback, dev); if (retval) { __update_runtime_status(dev, RPM_ACTIVE); @@ -633,6 +639,9 @@ static int rpm_resume(struct device *dev, int rpmflags) else callback = NULL; + if (!callback && dev->driver && dev->driver->pm) + callback = dev->driver->pm->runtime_resume; + retval = rpm_callback(callback, dev); if (retval) { __update_runtime_status(dev, RPM_SUSPENDED); -- cgit v0.10.2 From 9b39e73d0c2b265a7f8748b0e9a9f09be84079a8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 18 Dec 2011 00:34:24 +0100 Subject: PM / Sleep: Remove forward-only callbacks from platform bus type The forward-only PM callbacks provided by the platform bus type are not necessary any more, because the PM core executes driver callbacks when the corresponding subsystem callbacks are not present, so drop them. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 7a24895..7d912d5 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -700,25 +700,6 @@ static int platform_legacy_resume(struct device *dev) return ret; } -int platform_pm_prepare(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (drv && drv->pm && drv->pm->prepare) - ret = drv->pm->prepare(dev); - - return ret; -} - -void platform_pm_complete(struct device *dev) -{ - struct device_driver *drv = dev->driver; - - if (drv && drv->pm && drv->pm->complete) - drv->pm->complete(dev); -} - #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_SUSPEND @@ -741,22 +722,6 @@ int platform_pm_suspend(struct device *dev) return ret; } -int platform_pm_suspend_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->suspend_noirq) - ret = drv->pm->suspend_noirq(dev); - } - - return ret; -} - int platform_pm_resume(struct device *dev) { struct device_driver *drv = dev->driver; @@ -775,22 +740,6 @@ int platform_pm_resume(struct device *dev) return ret; } -int platform_pm_resume_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->resume_noirq) - ret = drv->pm->resume_noirq(dev); - } - - return ret; -} - #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATE_CALLBACKS @@ -813,22 +762,6 @@ int platform_pm_freeze(struct device *dev) return ret; } -int platform_pm_freeze_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->freeze_noirq) - ret = drv->pm->freeze_noirq(dev); - } - - return ret; -} - int platform_pm_thaw(struct device *dev) { struct device_driver *drv = dev->driver; @@ -847,22 +780,6 @@ int platform_pm_thaw(struct device *dev) return ret; } -int platform_pm_thaw_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->thaw_noirq) - ret = drv->pm->thaw_noirq(dev); - } - - return ret; -} - int platform_pm_poweroff(struct device *dev) { struct device_driver *drv = dev->driver; @@ -881,22 +798,6 @@ int platform_pm_poweroff(struct device *dev) return ret; } -int platform_pm_poweroff_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->poweroff_noirq) - ret = drv->pm->poweroff_noirq(dev); - } - - return ret; -} - int platform_pm_restore(struct device *dev) { struct device_driver *drv = dev->driver; @@ -915,22 +816,6 @@ int platform_pm_restore(struct device *dev) return ret; } -int platform_pm_restore_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->restore_noirq) - ret = drv->pm->restore_noirq(dev); - } - - return ret; -} - #endif /* CONFIG_HIBERNATE_CALLBACKS */ static const struct dev_pm_ops platform_dev_pm_ops = { diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 2a23f7d..b5267c9 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -264,62 +264,34 @@ static inline char *early_platform_driver_setup_func(void) \ } #endif /* MODULE */ -#ifdef CONFIG_PM_SLEEP -extern int platform_pm_prepare(struct device *dev); -extern void platform_pm_complete(struct device *dev); -#else -#define platform_pm_prepare NULL -#define platform_pm_complete NULL -#endif - #ifdef CONFIG_SUSPEND extern int platform_pm_suspend(struct device *dev); -extern int platform_pm_suspend_noirq(struct device *dev); extern int platform_pm_resume(struct device *dev); -extern int platform_pm_resume_noirq(struct device *dev); #else #define platform_pm_suspend NULL #define platform_pm_resume NULL -#define platform_pm_suspend_noirq NULL -#define platform_pm_resume_noirq NULL #endif #ifdef CONFIG_HIBERNATE_CALLBACKS extern int platform_pm_freeze(struct device *dev); -extern int platform_pm_freeze_noirq(struct device *dev); extern int platform_pm_thaw(struct device *dev); -extern int platform_pm_thaw_noirq(struct device *dev); extern int platform_pm_poweroff(struct device *dev); -extern int platform_pm_poweroff_noirq(struct device *dev); extern int platform_pm_restore(struct device *dev); -extern int platform_pm_restore_noirq(struct device *dev); #else #define platform_pm_freeze NULL #define platform_pm_thaw NULL #define platform_pm_poweroff NULL #define platform_pm_restore NULL -#define platform_pm_freeze_noirq NULL -#define platform_pm_thaw_noirq NULL -#define platform_pm_poweroff_noirq NULL -#define platform_pm_restore_noirq NULL #endif #ifdef CONFIG_PM_SLEEP #define USE_PLATFORM_PM_SLEEP_OPS \ - .prepare = platform_pm_prepare, \ - .complete = platform_pm_complete, \ .suspend = platform_pm_suspend, \ .resume = platform_pm_resume, \ .freeze = platform_pm_freeze, \ .thaw = platform_pm_thaw, \ .poweroff = platform_pm_poweroff, \ - .restore = platform_pm_restore, \ - .suspend_noirq = platform_pm_suspend_noirq, \ - .resume_noirq = platform_pm_resume_noirq, \ - .freeze_noirq = platform_pm_freeze_noirq, \ - .thaw_noirq = platform_pm_thaw_noirq, \ - .poweroff_noirq = platform_pm_poweroff_noirq, \ - .restore_noirq = platform_pm_restore_noirq, + .restore = platform_pm_restore, #else #define USE_PLATFORM_PM_SLEEP_OPS #endif -- cgit v0.10.2 From 8114ab763b2d297c8af49bf380a093d76e929692 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 18 Dec 2011 00:34:31 +0100 Subject: PM / Sleep: Remove forward-only callbacks from AMBA bus type The forward-only PM callbacks provided by the AMBA bus type are not necessary any more, because the PM core executes driver callbacks when the corresponding subsystem callbacks are not present, so drop them. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index bd230e8..0304b3f 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -109,31 +109,7 @@ static int amba_legacy_resume(struct device *dev) return ret; } -static int amba_pm_prepare(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (drv && drv->pm && drv->pm->prepare) - ret = drv->pm->prepare(dev); - - return ret; -} - -static void amba_pm_complete(struct device *dev) -{ - struct device_driver *drv = dev->driver; - - if (drv && drv->pm && drv->pm->complete) - drv->pm->complete(dev); -} - -#else /* !CONFIG_PM_SLEEP */ - -#define amba_pm_prepare NULL -#define amba_pm_complete NULL - -#endif /* !CONFIG_PM_SLEEP */ +#endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_SUSPEND @@ -155,22 +131,6 @@ static int amba_pm_suspend(struct device *dev) return ret; } -static int amba_pm_suspend_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->suspend_noirq) - ret = drv->pm->suspend_noirq(dev); - } - - return ret; -} - static int amba_pm_resume(struct device *dev) { struct device_driver *drv = dev->driver; @@ -189,28 +149,10 @@ static int amba_pm_resume(struct device *dev) return ret; } -static int amba_pm_resume_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->resume_noirq) - ret = drv->pm->resume_noirq(dev); - } - - return ret; -} - #else /* !CONFIG_SUSPEND */ #define amba_pm_suspend NULL #define amba_pm_resume NULL -#define amba_pm_suspend_noirq NULL -#define amba_pm_resume_noirq NULL #endif /* !CONFIG_SUSPEND */ @@ -234,22 +176,6 @@ static int amba_pm_freeze(struct device *dev) return ret; } -static int amba_pm_freeze_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->freeze_noirq) - ret = drv->pm->freeze_noirq(dev); - } - - return ret; -} - static int amba_pm_thaw(struct device *dev) { struct device_driver *drv = dev->driver; @@ -268,22 +194,6 @@ static int amba_pm_thaw(struct device *dev) return ret; } -static int amba_pm_thaw_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->thaw_noirq) - ret = drv->pm->thaw_noirq(dev); - } - - return ret; -} - static int amba_pm_poweroff(struct device *dev) { struct device_driver *drv = dev->driver; @@ -302,22 +212,6 @@ static int amba_pm_poweroff(struct device *dev) return ret; } -static int amba_pm_poweroff_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->poweroff_noirq) - ret = drv->pm->poweroff_noirq(dev); - } - - return ret; -} - static int amba_pm_restore(struct device *dev) { struct device_driver *drv = dev->driver; @@ -336,32 +230,12 @@ static int amba_pm_restore(struct device *dev) return ret; } -static int amba_pm_restore_noirq(struct device *dev) -{ - struct device_driver *drv = dev->driver; - int ret = 0; - - if (!drv) - return 0; - - if (drv->pm) { - if (drv->pm->restore_noirq) - ret = drv->pm->restore_noirq(dev); - } - - return ret; -} - #else /* !CONFIG_HIBERNATE_CALLBACKS */ #define amba_pm_freeze NULL #define amba_pm_thaw NULL #define amba_pm_poweroff NULL #define amba_pm_restore NULL -#define amba_pm_freeze_noirq NULL -#define amba_pm_thaw_noirq NULL -#define amba_pm_poweroff_noirq NULL -#define amba_pm_restore_noirq NULL #endif /* !CONFIG_HIBERNATE_CALLBACKS */ @@ -402,20 +276,12 @@ static int amba_pm_runtime_resume(struct device *dev) #ifdef CONFIG_PM static const struct dev_pm_ops amba_pm = { - .prepare = amba_pm_prepare, - .complete = amba_pm_complete, .suspend = amba_pm_suspend, .resume = amba_pm_resume, .freeze = amba_pm_freeze, .thaw = amba_pm_thaw, .poweroff = amba_pm_poweroff, .restore = amba_pm_restore, - .suspend_noirq = amba_pm_suspend_noirq, - .resume_noirq = amba_pm_resume_noirq, - .freeze_noirq = amba_pm_freeze_noirq, - .thaw_noirq = amba_pm_thaw_noirq, - .poweroff_noirq = amba_pm_poweroff_noirq, - .restore_noirq = amba_pm_restore_noirq, SET_RUNTIME_PM_OPS( amba_pm_runtime_suspend, amba_pm_runtime_resume, -- cgit v0.10.2 From 90363ddf0a1a4dccfbb8d0c10b8f488bc7fa69f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 18 Dec 2011 00:34:42 +0100 Subject: PM: Drop generic_subsys_pm_ops Since the PM core is now going to execute driver callbacks directly if the corresponding subsystem callbacks are not present, forward-only subsystem callbacks (i.e. such that only execute the corresponding driver callbacks) are not necessary any more. Thus it is possible to remove generic_subsys_pm_ops, because the only callback in there that is not forward-only, .runtime_idle, is not really used by the only user of generic_subsys_pm_ops, which is vio_bus_type. However, the generic callback routines themselves cannot be removed from generic_ops.c, because they are used individually by a number of subsystems. Signed-off-by: Rafael J. Wysocki diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index f65af61..8b08629 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -1406,7 +1406,6 @@ static struct bus_type vio_bus_type = { .match = vio_bus_match, .probe = vio_bus_probe, .remove = vio_bus_remove, - .pm = GENERIC_SUBSYS_PM_OPS, }; /** diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c index 5a5b154..10bdd79 100644 --- a/drivers/base/power/generic_ops.c +++ b/drivers/base/power/generic_ops.c @@ -276,28 +276,3 @@ void pm_generic_complete(struct device *dev) pm_runtime_idle(dev); } #endif /* CONFIG_PM_SLEEP */ - -struct dev_pm_ops generic_subsys_pm_ops = { -#ifdef CONFIG_PM_SLEEP - .prepare = pm_generic_prepare, - .suspend = pm_generic_suspend, - .suspend_noirq = pm_generic_suspend_noirq, - .resume = pm_generic_resume, - .resume_noirq = pm_generic_resume_noirq, - .freeze = pm_generic_freeze, - .freeze_noirq = pm_generic_freeze_noirq, - .thaw = pm_generic_thaw, - .thaw_noirq = pm_generic_thaw_noirq, - .poweroff = pm_generic_poweroff, - .poweroff_noirq = pm_generic_poweroff_noirq, - .restore = pm_generic_restore, - .restore_noirq = pm_generic_restore_noirq, - .complete = pm_generic_complete, -#endif -#ifdef CONFIG_PM_RUNTIME - .runtime_suspend = pm_generic_runtime_suspend, - .runtime_resume = pm_generic_runtime_resume, - .runtime_idle = pm_generic_runtime_idle, -#endif -}; -EXPORT_SYMBOL_GPL(generic_subsys_pm_ops); diff --git a/include/linux/pm.h b/include/linux/pm.h index 3f3ed83..21e04dd 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -300,19 +300,6 @@ const struct dev_pm_ops name = { \ SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \ } -/* - * Use this for subsystems (bus types, device types, device classes) that don't - * need any special suspend/resume handling in addition to invoking the PM - * callbacks provided by device drivers supporting both the system sleep PM and - * runtime PM, make the pm member point to generic_subsys_pm_ops. - */ -#ifdef CONFIG_PM -extern struct dev_pm_ops generic_subsys_pm_ops; -#define GENERIC_SUBSYS_PM_OPS (&generic_subsys_pm_ops) -#else -#define GENERIC_SUBSYS_PM_OPS NULL -#endif - /** * PM_EVENT_ messages * -- cgit v0.10.2 From f7dadb37931a6ffa2aa6b443188299166dc5e638 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 23 Dec 2011 01:23:07 +0100 Subject: PM / shmobile: Add support for the sh7372 A4S power domain / sleep mode The sh7372 contains a power domain named A4S which in turn contains power domains for both I/O Devices and CPU cores. At this point only System wide Suspend-to-RAM is supported, but the the hardware can also support CPUIdle. With more efforts in the future CPUIdle can work with bot A4S and A3SM. Tested on the sh7372 Mackerel board. [rjw: Rebased on top of the current linux-pm tree.] Signed-off-by: Magnus Damm Signed-off-by: Rafael J. Wysocki diff --git a/arch/arm/mach-shmobile/include/mach/common.h b/arch/arm/mach-shmobile/include/mach/common.h index 834bd6c..4807623 100644 --- a/arch/arm/mach-shmobile/include/mach/common.h +++ b/arch/arm/mach-shmobile/include/mach/common.h @@ -35,8 +35,8 @@ extern void sh7372_add_standard_devices(void); extern void sh7372_clock_init(void); extern void sh7372_pinmux_init(void); extern void sh7372_pm_init(void); -extern void sh7372_resume_core_standby_a3sm(void); -extern int sh7372_do_idle_a3sm(unsigned long unused); +extern void sh7372_resume_core_standby_sysc(void); +extern int sh7372_do_idle_sysc(unsigned long sleep_mode); extern struct clk sh7372_extal1_clk; extern struct clk sh7372_extal2_clk; diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h index 84532f9..d0731d9 100644 --- a/arch/arm/mach-shmobile/include/mach/sh7372.h +++ b/arch/arm/mach-shmobile/include/mach/sh7372.h @@ -499,6 +499,7 @@ extern struct sh7372_pm_domain sh7372_d4; extern struct sh7372_pm_domain sh7372_a4r; extern struct sh7372_pm_domain sh7372_a3rv; extern struct sh7372_pm_domain sh7372_a3ri; +extern struct sh7372_pm_domain sh7372_a4s; extern struct sh7372_pm_domain sh7372_a3sp; extern struct sh7372_pm_domain sh7372_a3sg; @@ -515,5 +516,7 @@ extern void sh7372_pm_add_subdomain(struct sh7372_pm_domain *sh7372_pd, extern void sh7372_intcs_suspend(void); extern void sh7372_intcs_resume(void); +extern void sh7372_intca_suspend(void); +extern void sh7372_intca_resume(void); #endif /* __ASM_SH7372_H__ */ diff --git a/arch/arm/mach-shmobile/intc-sh7372.c b/arch/arm/mach-shmobile/intc-sh7372.c index 2d8856d..d087b31 100644 --- a/arch/arm/mach-shmobile/intc-sh7372.c +++ b/arch/arm/mach-shmobile/intc-sh7372.c @@ -611,3 +611,52 @@ void sh7372_intcs_resume(void) for (k = 0x80; k <= 0x9c; k += 4) __raw_writeb(ffd5[k], intcs_ffd5 + k); } + +static unsigned short e694[0x200]; +static unsigned short e695[0x200]; + +void sh7372_intca_suspend(void) +{ + int k; + + for (k = 0x00; k <= 0x38; k += 4) + e694[k] = __raw_readw(0xe6940000 + k); + + for (k = 0x80; k <= 0xb4; k += 4) + e694[k] = __raw_readb(0xe6940000 + k); + + for (k = 0x180; k <= 0x1b4; k += 4) + e694[k] = __raw_readb(0xe6940000 + k); + + for (k = 0x00; k <= 0x50; k += 4) + e695[k] = __raw_readw(0xe6950000 + k); + + for (k = 0x80; k <= 0xa8; k += 4) + e695[k] = __raw_readb(0xe6950000 + k); + + for (k = 0x180; k <= 0x1a8; k += 4) + e695[k] = __raw_readb(0xe6950000 + k); +} + +void sh7372_intca_resume(void) +{ + int k; + + for (k = 0x00; k <= 0x38; k += 4) + __raw_writew(e694[k], 0xe6940000 + k); + + for (k = 0x80; k <= 0xb4; k += 4) + __raw_writeb(e694[k], 0xe6940000 + k); + + for (k = 0x180; k <= 0x1b4; k += 4) + __raw_writeb(e694[k], 0xe6940000 + k); + + for (k = 0x00; k <= 0x50; k += 4) + __raw_writew(e695[k], 0xe6950000 + k); + + for (k = 0x80; k <= 0xa8; k += 4) + __raw_writeb(e695[k], 0xe6950000 + k); + + for (k = 0x180; k <= 0x1a8; k += 4) + __raw_writeb(e695[k], 0xe6950000 + k); +} diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index f742c61..1e659d7 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -256,6 +256,14 @@ struct sh7372_pm_domain sh7372_a3ri = { .bit_shift = 8, }; +struct sh7372_pm_domain sh7372_a4s = { + .genpd.name = "A4S", + .bit_shift = 10, + .gov = &pm_domain_always_on_gov, + .no_debug = true, + .stay_on = true, +}; + struct sh7372_pm_domain sh7372_a3sp = { .genpd.name = "A3SP", .bit_shift = 11, @@ -289,11 +297,16 @@ static int sh7372_do_idle_core_standby(unsigned long unused) return 0; } -static void sh7372_enter_core_standby(void) +static void sh7372_set_reset_vector(unsigned long address) { /* set reset vector, translate 4k */ - __raw_writel(__pa(sh7372_resume_core_standby_a3sm), SBAR); + __raw_writel(address, SBAR); __raw_writel(0, APARMBAREA); +} + +static void sh7372_enter_core_standby(void) +{ + sh7372_set_reset_vector(__pa(sh7372_resume_core_standby_sysc)); /* enter sleep mode with SYSTBCR to 0x10 */ __raw_writel(0x10, SYSTBCR); @@ -306,27 +319,22 @@ static void sh7372_enter_core_standby(void) #endif #ifdef CONFIG_SUSPEND -static void sh7372_enter_a3sm_common(int pllc0_on) +static void sh7372_enter_sysc(int pllc0_on, unsigned long sleep_mode) { - /* set reset vector, translate 4k */ - __raw_writel(__pa(sh7372_resume_core_standby_a3sm), SBAR); - __raw_writel(0, APARMBAREA); - if (pllc0_on) __raw_writel(0, PLLC01STPCR); else __raw_writel(1 << 28, PLLC01STPCR); - __raw_writel(0, PDNSEL); /* power-down A3SM only, not A4S */ __raw_readl(WUPSFAC); /* read wakeup int. factor before sleep */ - cpu_suspend(0, sh7372_do_idle_a3sm); + cpu_suspend(sleep_mode, sh7372_do_idle_sysc); __raw_readl(WUPSFAC); /* read wakeup int. factor after wakeup */ /* disable reset vector translation */ __raw_writel(0, SBAR); } -static int sh7372_a3sm_valid(unsigned long *mskp, unsigned long *msk2p) +static int sh7372_sysc_valid(unsigned long *mskp, unsigned long *msk2p) { unsigned long mstpsr0, mstpsr1, mstpsr2, mstpsr3, mstpsr4; unsigned long msk, msk2; @@ -414,7 +422,7 @@ static void sh7372_icr_to_irqcr(unsigned long icr, u16 *irqcr1p, u16 *irqcr2p) *irqcr2p = irqcr2; } -static void sh7372_setup_a3sm(unsigned long msk, unsigned long msk2) +static void sh7372_setup_sysc(unsigned long msk, unsigned long msk2) { u16 irqcrx_low, irqcrx_high, irqcry_low, irqcry_high; unsigned long tmp; @@ -447,6 +455,22 @@ static void sh7372_setup_a3sm(unsigned long msk, unsigned long msk2) __raw_writel((irqcrx_high << 16) | irqcrx_low, IRQCR3); __raw_writel((irqcry_high << 16) | irqcry_low, IRQCR4); } + +static void sh7372_enter_a3sm_common(int pllc0_on) +{ + sh7372_set_reset_vector(__pa(sh7372_resume_core_standby_sysc)); + sh7372_enter_sysc(pllc0_on, 1 << 12); +} + +static void sh7372_enter_a4s_common(int pllc0_on) +{ + sh7372_intca_suspend(); + memcpy((void *)SMFRAM, sh7372_resume_core_standby_sysc, 0x100); + sh7372_set_reset_vector(SMFRAM); + sh7372_enter_sysc(pllc0_on, 1 << 10); + sh7372_intca_resume(); +} + #endif #ifdef CONFIG_CPU_IDLE @@ -480,14 +504,20 @@ static int sh7372_enter_suspend(suspend_state_t suspend_state) unsigned long msk, msk2; /* check active clocks to determine potential wakeup sources */ - if (sh7372_a3sm_valid(&msk, &msk2)) { - + if (sh7372_sysc_valid(&msk, &msk2)) { /* convert INTC mask and sense to SYSC mask and sense */ - sh7372_setup_a3sm(msk, msk2); - - /* enter A3SM sleep with PLLC0 off */ - pr_debug("entering A3SM\n"); - sh7372_enter_a3sm_common(0); + sh7372_setup_sysc(msk, msk2); + + if (!sh7372_a3sp.stay_on && + sh7372_a4s.genpd.status == GPD_STATE_POWER_OFF) { + /* enter A4S sleep with PLLC0 off */ + pr_debug("entering A4S\n"); + sh7372_enter_a4s_common(0); + } else { + /* enter A3SM sleep with PLLC0 off */ + pr_debug("entering A3SM\n"); + sh7372_enter_a3sm_common(0); + } } else { /* default to Core Standby that supports all wakeup sources */ pr_debug("entering Core Standby\n"); diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c index 2380389..c197f9d 100644 --- a/arch/arm/mach-shmobile/setup-sh7372.c +++ b/arch/arm/mach-shmobile/setup-sh7372.c @@ -994,12 +994,16 @@ void __init sh7372_add_standard_devices(void) sh7372_init_pm_domain(&sh7372_a4r); sh7372_init_pm_domain(&sh7372_a3rv); sh7372_init_pm_domain(&sh7372_a3ri); - sh7372_init_pm_domain(&sh7372_a3sg); + sh7372_init_pm_domain(&sh7372_a4s); sh7372_init_pm_domain(&sh7372_a3sp); + sh7372_init_pm_domain(&sh7372_a3sg); sh7372_pm_add_subdomain(&sh7372_a4lc, &sh7372_a3rv); sh7372_pm_add_subdomain(&sh7372_a4r, &sh7372_a4lc); + sh7372_pm_add_subdomain(&sh7372_a4s, &sh7372_a3sg); + sh7372_pm_add_subdomain(&sh7372_a4s, &sh7372_a3sp); + platform_add_devices(sh7372_early_devices, ARRAY_SIZE(sh7372_early_devices)); diff --git a/arch/arm/mach-shmobile/sleep-sh7372.S b/arch/arm/mach-shmobile/sleep-sh7372.S index f3ab3c5..1d56467 100644 --- a/arch/arm/mach-shmobile/sleep-sh7372.S +++ b/arch/arm/mach-shmobile/sleep-sh7372.S @@ -37,13 +37,18 @@ #if defined(CONFIG_SUSPEND) || defined(CONFIG_CPU_IDLE) .align 12 .text - .global sh7372_resume_core_standby_a3sm -sh7372_resume_core_standby_a3sm: + .global sh7372_resume_core_standby_sysc +sh7372_resume_core_standby_sysc: ldr pc, 1f 1: .long cpu_resume - PAGE_OFFSET + PLAT_PHYS_OFFSET - .global sh7372_do_idle_a3sm -sh7372_do_idle_a3sm: +#define SPDCR 0xe6180008 + + /* A3SM & A4S power down */ + .global sh7372_do_idle_sysc +sh7372_do_idle_sysc: + mov r8, r0 /* sleep mode passed in r0 */ + /* * Clear the SCTLR.C bit to prevent further data cache * allocation. Clearing SCTLR.C would make all the data accesses @@ -80,13 +85,9 @@ sh7372_do_idle_a3sm: dsb dmb -#define SPDCR 0xe6180008 -#define A3SM (1 << 12) - - /* A3SM power down */ + /* SYSC power down */ ldr r0, =SPDCR - ldr r1, =A3SM - str r1, [r0] + str r8, [r0] 1: b 1b -- cgit v0.10.2 From 0f966d74cf77a9140a025464a287e1d2fee8a1fc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Dec 2011 01:23:30 +0100 Subject: PM / shmobile: Don't include SH7372's INTCS in syscore suspend/resume Since the SH7372's INTCS in included into syscore suspend/resume, which causes the chip to be accessed when PM domains have been turned off during system suspend, the A4R domain containing the INTCS has to stay on during system sleep, which is suboptimal from the power consumption point of view. For this reason, add a new INTC flag, skip_syscore_suspend, to mark the INTCS for intc_suspend() and intc_resume(), so that they don't touch it. This allows the A4R domain to be turned off during system suspend and the INTCS state is resrored during system resume by the A4R's "power on" code. Suggested-by: Magnus Damm Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm diff --git a/arch/arm/mach-shmobile/intc-sh7372.c b/arch/arm/mach-shmobile/intc-sh7372.c index d087b31..89afcab 100644 --- a/arch/arm/mach-shmobile/intc-sh7372.c +++ b/arch/arm/mach-shmobile/intc-sh7372.c @@ -535,6 +535,7 @@ static struct resource intcs_resources[] __initdata = { static struct intc_desc intcs_desc __initdata = { .name = "sh7372-intcs", .force_enable = ENABLED_INTCS, + .skip_syscore_suspend = true, .resource = intcs_resources, .num_resources = ARRAY_SIZE(intcs_resources), .hw = INTC_HW_DESC(intcs_vectors, intcs_groups, intcs_mask_registers, diff --git a/drivers/sh/intc/core.c b/drivers/sh/intc/core.c index 8b7a141..be5a025 100644 --- a/drivers/sh/intc/core.c +++ b/drivers/sh/intc/core.c @@ -354,6 +354,8 @@ int __init register_intc_controller(struct intc_desc *desc) if (desc->force_enable) intc_enable_disable_enum(desc, d, desc->force_enable, 1); + d->skip_suspend = desc->skip_syscore_suspend; + nr_intc_controllers++; return 0; @@ -386,6 +388,9 @@ static int intc_suspend(void) list_for_each_entry(d, &intc_list, list) { int irq; + if (d->skip_suspend) + continue; + /* enable wakeup irqs belonging to this intc controller */ for_each_active_irq(irq) { struct irq_data *data; @@ -409,6 +414,9 @@ static void intc_resume(void) list_for_each_entry(d, &intc_list, list) { int irq; + if (d->skip_suspend) + continue; + for_each_active_irq(irq) { struct irq_data *data; struct irq_chip *chip; diff --git a/drivers/sh/intc/internals.h b/drivers/sh/intc/internals.h index 5b93485..b3fe1cf 100644 --- a/drivers/sh/intc/internals.h +++ b/drivers/sh/intc/internals.h @@ -67,6 +67,7 @@ struct intc_desc_int { struct intc_window *window; unsigned int nr_windows; struct irq_chip chip; + bool skip_suspend; }; diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h index 5812fef..b160645 100644 --- a/include/linux/sh_intc.h +++ b/include/linux/sh_intc.h @@ -95,6 +95,7 @@ struct intc_desc { unsigned int num_resources; intc_enum force_enable; intc_enum force_disable; + bool skip_syscore_suspend; struct intc_hw_desc hw; }; -- cgit v0.10.2 From 767c0f3aed74be56f268709f5347e6c86d52b408 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Dec 2011 01:23:39 +0100 Subject: PM / shmobile: Remove the stay_on flag from SH7372's PM domains SH7372 uses two independent mechanisms for ensuring that power domains will never be turned off: the stay_on flag and the "always on" domain governor. Moreover, the "always on" governor is only taken into accout by runtime PM code paths, while the stay_on flag affects all attempts to turn the given domain off. Thus setting the stay_on flag causes the "always on" governor to be unnecessary, which is quite confusing. However, the stay_on flag is currently only set for two domains: A3SP and A4S. Moreover, it only is set for the A3SP domain if console_suspend_enabled is set, so stay_on won't be necessary for that domain any more if console_suspend_enabled is checked directly in its .suspend() routine. [This requires domain .suspend() to return a result, but that is a minor modification.] Analogously, stay_on won't be necessary for the A4S domain if it's .suspend() routine always returns an error code. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h index d0731d9..8254ab8 100644 --- a/arch/arm/mach-shmobile/include/mach/sh7372.h +++ b/arch/arm/mach-shmobile/include/mach/sh7372.h @@ -480,11 +480,10 @@ struct platform_device; struct sh7372_pm_domain { struct generic_pm_domain genpd; struct dev_power_governor *gov; - void (*suspend)(void); + int (*suspend)(void); void (*resume)(void); unsigned int bit_shift; bool no_debug; - bool stay_on; }; static inline struct sh7372_pm_domain *to_sh7372_pd(struct generic_pm_domain *d) diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 1e659d7..7fda230 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -82,11 +82,12 @@ static int pd_power_down(struct generic_pm_domain *genpd) struct sh7372_pm_domain *sh7372_pd = to_sh7372_pd(genpd); unsigned int mask = 1 << sh7372_pd->bit_shift; - if (sh7372_pd->suspend) - sh7372_pd->suspend(); + if (sh7372_pd->suspend) { + int ret = sh7372_pd->suspend(); - if (sh7372_pd->stay_on) - return 0; + if (ret) + return ret; + } if (__raw_readl(PSTR) & mask) { unsigned int retry_count; @@ -113,9 +114,6 @@ static int __pd_power_up(struct sh7372_pm_domain *sh7372_pd, bool do_resume) unsigned int retry_count; int ret = 0; - if (sh7372_pd->stay_on) - goto out; - if (__raw_readl(PSTR) & mask) goto out; @@ -148,10 +146,11 @@ static int pd_power_up(struct generic_pm_domain *genpd) return __pd_power_up(to_sh7372_pd(genpd), true); } -static void sh7372_a4r_suspend(void) +static int sh7372_a4r_suspend(void) { sh7372_intcs_suspend(); __raw_writel(0x300fffff, WUPRMSK); /* avoid wakeup */ + return 0; } static bool pd_active_wakeup(struct device *dev) @@ -243,7 +242,6 @@ struct sh7372_pm_domain sh7372_a4r = { .gov = &pm_domain_always_on_gov, .suspend = sh7372_a4r_suspend, .resume = sh7372_intcs_resume, - .stay_on = true, }; struct sh7372_pm_domain sh7372_a3rv = { @@ -256,29 +254,40 @@ struct sh7372_pm_domain sh7372_a3ri = { .bit_shift = 8, }; +static int sh7372_a4s_suspend(void) +{ + /* + * The A4S domain contains the CPU core and therefore it should + * only be turned off if the CPU is in use. + */ + return -EBUSY; +} + struct sh7372_pm_domain sh7372_a4s = { .genpd.name = "A4S", .bit_shift = 10, .gov = &pm_domain_always_on_gov, .no_debug = true, - .stay_on = true, + .suspend = sh7372_a4s_suspend, }; +static int sh7372_a3sp_suspend(void) +{ + /* + * Serial consoles make use of SCIF hardware located in A3SP, + * keep such power domain on if "no_console_suspend" is set. + */ + return console_suspend_enabled ? -EBUSY : 0; +} + struct sh7372_pm_domain sh7372_a3sp = { .genpd.name = "A3SP", .bit_shift = 11, .gov = &pm_domain_always_on_gov, .no_debug = true, + .suspend = sh7372_a3sp_suspend, }; -static void sh7372_a3sp_init(void) -{ - /* serial consoles make use of SCIF hardware located in A3SP, - * keep such power domain on if "no_console_suspend" is set. - */ - sh7372_a3sp.stay_on = !console_suspend_enabled; -} - struct sh7372_pm_domain sh7372_a3sg = { .genpd.name = "A3SG", .bit_shift = 13, @@ -508,7 +517,7 @@ static int sh7372_enter_suspend(suspend_state_t suspend_state) /* convert INTC mask and sense to SYSC mask and sense */ sh7372_setup_sysc(msk, msk2); - if (!sh7372_a3sp.stay_on && + if (!console_suspend_enabled && sh7372_a4s.genpd.status == GPD_STATE_POWER_OFF) { /* enter A4S sleep with PLLC0 off */ pr_debug("entering A4S\n"); @@ -544,8 +553,6 @@ void __init sh7372_pm_init(void) /* do not convert A3SM, A3SP, A3SG, A4R power down into A4S */ __raw_writel(0, PDNSEL); - sh7372_a3sp_init(); - sh7372_suspend_init(); sh7372_cpuidle_init(); } -- cgit v0.10.2 From 40a5f8be2f482783de0f1f0fe856660e489734a8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Dec 2011 01:23:52 +0100 Subject: PM / QoS: Introduce dev_pm_qos_add_ancestor_request() Some devices, like the I2C controller on SH7372, are not necessary for providing power to their children or forwarding wakeup signals (and generally interrupts) from them. They are only needed by their children when there's some data to transfer, so they may be suspended for the majority of time and resumed on demand, when the children have data to send or receive. For this purpose, however, their power.ignore_children flags have to be set, or the PM core wouldn't allow them to be suspended while their children were active. Unfortunately, in some situations it may take too much time to resume such devices so that they can assist their children in transferring data. For example, if such a device belongs to a PM domain which goes to the "power off" state when that device is suspended, it may take too much time to restore power to the domain in response to the request from one of the device's children. In that case, if the parent's resume time is critical, the domain should stay in the "power on" state, although it still may be desirable to power manage the parent itself (e.g. by manipulating its clock). In general, device PM QoS may be used to address this problem. Namely, if the device's children added PM QoS latency constraints for it, they would be able to prevent it from being put into an overly deep low-power state. However, in some cases the devices needing to be serviced are not the immediate children of a "children-ignoring" device, but its grandchildren or even less direct descendants. In those cases, the entity wanting to add a PM QoS request for a given device's ancestor that ignores its children will have to find it in the first place, so introduce a new helper function that may be used to achieve that. This function, dev_pm_qos_add_ancestor_request(), will search for the first ancestor of the given device whose power.ignore_children flag is set and will add a device PM QoS latency request for that ancestor on behalf of the caller. The request added this way may be removed with the help of dev_pm_qos_remove_request() in the future, like any other device PM QoS latency request. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 86de6c5..edf7687 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -412,3 +412,28 @@ int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier) return blocking_notifier_chain_unregister(&dev_pm_notifiers, notifier); } EXPORT_SYMBOL_GPL(dev_pm_qos_remove_global_notifier); + +/** + * dev_pm_qos_add_ancestor_request - Add PM QoS request for device's ancestor. + * @dev: Device whose ancestor to add the request for. + * @req: Pointer to the preallocated handle. + * @value: Constraint latency value. + */ +int dev_pm_qos_add_ancestor_request(struct device *dev, + struct dev_pm_qos_request *req, s32 value) +{ + struct device *ancestor = dev->parent; + int error = -ENODEV; + + while (ancestor && !ancestor->power.ignore_children) + ancestor = ancestor->parent; + + if (ancestor) + error = dev_pm_qos_add_request(ancestor, req, value); + + if (error) + req->dev = NULL; + + return error; +} +EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request); diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 83b0ea3..fe247b3 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -91,6 +91,8 @@ int dev_pm_qos_add_global_notifier(struct notifier_block *notifier); int dev_pm_qos_remove_global_notifier(struct notifier_block *notifier); void dev_pm_qos_constraints_init(struct device *dev); void dev_pm_qos_constraints_destroy(struct device *dev); +int dev_pm_qos_add_ancestor_request(struct device *dev, + struct dev_pm_qos_request *req, s32 value); #else static inline int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, @@ -150,6 +152,9 @@ static inline void dev_pm_qos_constraints_destroy(struct device *dev) { dev->power.power_state = PMSG_INVALID; } +static inline int dev_pm_qos_add_ancestor_request(struct device *dev, + struct dev_pm_qos_request *req, s32 value) + { return 0; } #endif #endif -- cgit v0.10.2 From 9ee27ffbe303ce18e7336115f1d443e9911eba53 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Dec 2011 01:24:25 +0100 Subject: PM / input / touchscreen: Make st1232 use device PM QoS constraints Make the st1232 driver use dev_pm_qos_add_ancestor_request() to add a device PM QoS latency constraint for the controller it depends on, so that the controller won't go into an overly deep low-power state when the touchscreen has to be particularly responsive (e.g. when the user moves his or her finger on it). This change is based on a prototype patch from Guennadi Liakhovetski. Signed-off-by: Rafael J. Wysocki diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c index 4ab3713..8825fe3 100644 --- a/drivers/input/touchscreen/st1232.c +++ b/drivers/input/touchscreen/st1232.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,7 @@ struct st1232_ts_data { struct i2c_client *client; struct input_dev *input_dev; struct st1232_ts_finger finger[MAX_FINGERS]; + struct dev_pm_qos_request low_latency_req; }; static int st1232_ts_read_data(struct st1232_ts_data *ts) @@ -118,8 +120,17 @@ static irqreturn_t st1232_ts_irq_handler(int irq, void *dev_id) } /* SYN_MT_REPORT only if no contact */ - if (!count) + if (!count) { input_mt_sync(input_dev); + if (ts->low_latency_req.dev) { + dev_pm_qos_remove_request(&ts->low_latency_req); + ts->low_latency_req.dev = NULL; + } + } else if (!ts->low_latency_req.dev) { + /* First contact, request 100 us latency. */ + dev_pm_qos_add_ancestor_request(&ts->client->dev, + &ts->low_latency_req, 100); + } /* SYN_REPORT */ input_sync(input_dev); -- cgit v0.10.2 From a8cf27bee7adc40d91956cf1b9e44d7001f93aba Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Dec 2011 01:24:34 +0100 Subject: PM / shmobile: Allow the A4R domain to be turned off at run time After adding PM QoS constraints for the I2C controller in the A4R domain, that domain can be allowed to be turned off and on by runtime PM, so remove the "always on" governor from it. However, the A4R domain has to be "on" when suspend_device_irqs() and resume_device_irqs() are executed during system suspend and resume, respectively, so that those functions don't crash while accessing the INTCS. For this reason, add a PM notifier to the SH7372 PM code and make it restore power to A4R before system suspend and remove power from all unused PM domains after system resume. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c index 7fda230..77b8fc1 100644 --- a/arch/arm/mach-shmobile/pm-sh7372.c +++ b/arch/arm/mach-shmobile/pm-sh7372.c @@ -239,7 +239,6 @@ struct sh7372_pm_domain sh7372_d4 = { struct sh7372_pm_domain sh7372_a4r = { .genpd.name = "A4R", .bit_shift = 5, - .gov = &pm_domain_always_on_gov, .suspend = sh7372_a4r_suspend, .resume = sh7372_intcs_resume, }; @@ -535,9 +534,37 @@ static int sh7372_enter_suspend(suspend_state_t suspend_state) return 0; } +/** + * sh7372_pm_notifier_fn - SH7372 PM notifier routine. + * @notifier: Unused. + * @pm_event: Event being handled. + * @unused: Unused. + */ +static int sh7372_pm_notifier_fn(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + switch (pm_event) { + case PM_SUSPEND_PREPARE: + /* + * This is necessary, because the A4R domain has to be "on" + * when suspend_device_irqs() and resume_device_irqs() are + * executed during system suspend and resume, respectively, so + * that those functions don't crash while accessing the INTCS. + */ + pm_genpd_poweron(&sh7372_a4r.genpd); + break; + case PM_POST_SUSPEND: + pm_genpd_poweroff_unused(); + break; + } + + return NOTIFY_DONE; +} + static void sh7372_suspend_init(void) { shmobile_suspend_ops.enter = sh7372_enter_suspend; + pm_notifier(sh7372_pm_notifier_fn, 0); } #else static void sh7372_suspend_init(void) {} -- cgit v0.10.2 From b3b73ec0d7fe5bf8f950232aa58dfa0416a62372 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 26 Dec 2011 00:29:55 +0100 Subject: PM / Freezer: fix return value of freezable_schedule_timeout_killable() ...it should return the return code from schedule_timeout_killable(), not the one from freezer_count(). All of the current callers ignore the return code so the bug is harmless but it's worth fixing. Signed-off-by: Jeff Layton Signed-off-by: Rafael J. Wysocki diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 7bcfe73..0ab54e1 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -116,9 +116,11 @@ static inline int freezer_should_skip(struct task_struct *p) /* Like schedule_timeout_killable(), but should not block the freezer. */ #define freezable_schedule_timeout_killable(timeout) \ ({ \ + long __retval; \ freezer_do_not_count(); \ - schedule_timeout_killable(timeout); \ + __retval = schedule_timeout_killable(timeout); \ freezer_count(); \ + __retval; \ }) /* -- cgit v0.10.2 From c336078bf65c4d38caa9a4b8b7b7261c778e622c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 27 Dec 2011 22:54:52 +0100 Subject: PM / Hibernate: Implement compat_ioctl for /dev/snapshot This allows uswsusp built for i386 to run on an x86_64 kernel (tested with Debian package version 1.0+20110509-2). References: http://bugs.debian.org/502816 Signed-off-by: Ben Hutchings Signed-off-by: Rafael J. Wysocki diff --git a/kernel/power/user.c b/kernel/power/user.c index 78bdb44..6b1ab7a 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -380,6 +381,66 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, return error; } +#ifdef CONFIG_COMPAT + +struct compat_resume_swap_area { + compat_loff_t offset; + u32 dev; +} __packed; + +static long +snapshot_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + BUILD_BUG_ON(sizeof(loff_t) != sizeof(compat_loff_t)); + + switch (cmd) { + case SNAPSHOT_GET_IMAGE_SIZE: + case SNAPSHOT_AVAIL_SWAP_SIZE: + case SNAPSHOT_ALLOC_SWAP_PAGE: { + compat_loff_t __user *uoffset = compat_ptr(arg); + loff_t offset; + mm_segment_t old_fs; + int err; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = snapshot_ioctl(file, cmd, (unsigned long) &offset); + set_fs(old_fs); + if (!err && put_user(offset, uoffset)) + err = -EFAULT; + return err; + } + + case SNAPSHOT_CREATE_IMAGE: + return snapshot_ioctl(file, cmd, + (unsigned long) compat_ptr(arg)); + + case SNAPSHOT_SET_SWAP_AREA: { + struct compat_resume_swap_area __user *u_swap_area = + compat_ptr(arg); + struct resume_swap_area swap_area; + mm_segment_t old_fs; + int err; + + err = get_user(swap_area.offset, &u_swap_area->offset); + err |= get_user(swap_area.dev, &u_swap_area->dev); + if (err) + return -EFAULT; + old_fs = get_fs(); + set_fs(KERNEL_DS); + err = snapshot_ioctl(file, SNAPSHOT_SET_SWAP_AREA, + (unsigned long) &swap_area); + set_fs(old_fs); + return err; + } + + default: + return snapshot_ioctl(file, cmd, arg); + } +} + +#endif /* CONFIG_COMPAT */ + static const struct file_operations snapshot_fops = { .open = snapshot_open, .release = snapshot_release, @@ -387,6 +448,9 @@ static const struct file_operations snapshot_fops = { .write = snapshot_write, .llseek = no_llseek, .unlocked_ioctl = snapshot_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = snapshot_compat_ioctl, +#endif }; static struct miscdevice snapshot_device = { -- cgit v0.10.2