diff options
33 files changed, 984 insertions, 1122 deletions
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index 421e7d0..c9abbd8 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -75,9 +75,6 @@ may need to apply in domain-specific ways to their devices: struct bus_type { ... int (*suspend)(struct device *dev, pm_message_t state); - int (*suspend_late)(struct device *dev, pm_message_t state); - - int (*resume_early)(struct device *dev); int (*resume)(struct device *dev); }; @@ -226,20 +223,7 @@ The phases are seen by driver notifications issued in this order: This call should handle parts of device suspend logic that require sleeping. It probably does work to quiesce the device which hasn't - been abstracted into class.suspend() or bus.suspend_late(). - - 3 bus.suspend_late(dev, message) is called with IRQs disabled, and - with only one CPU active. Until the bus.resume_early() phase - completes (see later), IRQs are not enabled again. This method - won't be exposed by all busses; for message based busses like USB, - I2C, or SPI, device interactions normally require IRQs. This bus - call may be morphed into a driver call with bus-specific parameters. - - This call might save low level hardware state that might otherwise - be lost in the upcoming low power state, and actually put the - device into a low power state ... so that in some cases the device - may stay partly usable until this late. This "late" call may also - help when coping with hardware that behaves badly. + been abstracted into class.suspend(). The pm_message_t parameter is currently used to refine those semantics (described later). @@ -351,19 +335,11 @@ devices processing each phase's calls before the next phase begins. The phases are seen by driver notifications issued in this order: - 1 bus.resume_early(dev) is called with IRQs disabled, and with - only one CPU active. As with bus.suspend_late(), this method - won't be supported on busses that require IRQs in order to - interact with devices. - - This reverses the effects of bus.suspend_late(). - - 2 bus.resume(dev) is called next. This may be morphed into a device - driver call with bus-specific parameters; implementations may sleep. - - This reverses the effects of bus.suspend(). + 1 bus.resume(dev) reverses the effects of bus.suspend(). This may + be morphed into a device driver call with bus-specific parameters; + implementations may sleep. - 3 class.resume(dev) is called for devices associated with a class + 2 class.resume(dev) is called for devices associated with a class that has such a method. Implementations may sleep. This reverses the effects of class.suspend(), and would usually diff --git a/arch/alpha/include/asm/suspend.h b/arch/alpha/include/asm/suspend.h deleted file mode 100644 index c7042d5..0000000 --- a/arch/alpha/include/asm/suspend.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ALPHA_SUSPEND_H -#define __ALPHA_SUSPEND_H - -/* Dummy include. */ - -#endif /* __ALPHA_SUSPEND_H */ diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h deleted file mode 100644 index cf0d0bd..0000000 --- a/arch/arm/include/asm/suspend.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _ASMARM_SUSPEND_H -#define _ASMARM_SUSPEND_H - -#endif diff --git a/arch/ia64/include/asm/suspend.h b/arch/ia64/include/asm/suspend.h deleted file mode 100644 index b05bbb6..0000000 --- a/arch/ia64/include/asm/suspend.h +++ /dev/null @@ -1 +0,0 @@ -/* dummy (must be non-empty to prevent prejudicial removal...) */ diff --git a/arch/m68k/include/asm/suspend.h b/arch/m68k/include/asm/suspend.h deleted file mode 100644 index 57b3ddb..0000000 --- a/arch/m68k/include/asm/suspend.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _M68K_SUSPEND_H -#define _M68K_SUSPEND_H - -/* Dummy include. */ - -#endif /* _M68K_SUSPEND_H */ diff --git a/arch/mips/include/asm/suspend.h b/arch/mips/include/asm/suspend.h deleted file mode 100644 index 2562f8f..0000000 --- a/arch/mips/include/asm/suspend.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SUSPEND_H -#define __ASM_SUSPEND_H - -/* Somewhen... Maybe :-) */ - -#endif /* __ASM_SUSPEND_H */ diff --git a/arch/s390/include/asm/suspend.h b/arch/s390/include/asm/suspend.h deleted file mode 100644 index 1f34580..0000000 --- a/arch/s390/include/asm/suspend.h +++ /dev/null @@ -1,5 +0,0 @@ -#ifndef __ASM_S390_SUSPEND_H -#define __ASM_S390_SUSPEND_H - -#endif - diff --git a/arch/um/include/asm/suspend.h b/arch/um/include/asm/suspend.h deleted file mode 100644 index f4e8e00..0000000 --- a/arch/um/include/asm/suspend.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __UM_SUSPEND_H -#define __UM_SUSPEND_H - -#endif diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 7c243a2..ca93638 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -104,7 +104,7 @@ int acpi_save_state_mem(void) initial_gs = per_cpu_offset(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; - saved_magic = 0x123456789abcdef0; + saved_magic = 0x123456789abcdef0L; #endif /* CONFIG_64BIT */ return 0; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 49e0939..79302e9 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1233,9 +1233,9 @@ static int suspend(int vetoable) int err; struct apm_user *as; - device_suspend(PMSG_SUSPEND); + dpm_suspend_start(PMSG_SUSPEND); - device_power_down(PMSG_SUSPEND); + dpm_suspend_noirq(PMSG_SUSPEND); local_irq_disable(); sysdev_suspend(PMSG_SUSPEND); @@ -1259,9 +1259,9 @@ static int suspend(int vetoable) sysdev_resume(); local_irq_enable(); - device_power_up(PMSG_RESUME); + dpm_resume_noirq(PMSG_RESUME); - device_resume(PMSG_RESUME); + dpm_resume_end(PMSG_RESUME); queue_event(APM_NORMAL_RESUME, NULL); spin_lock(&user_list_lock); for (as = user_list; as != NULL; as = as->next) { @@ -1277,7 +1277,7 @@ static void standby(void) { int err; - device_power_down(PMSG_SUSPEND); + dpm_suspend_noirq(PMSG_SUSPEND); local_irq_disable(); sysdev_suspend(PMSG_SUSPEND); @@ -1291,7 +1291,7 @@ static void standby(void) sysdev_resume(); local_irq_enable(); - device_power_up(PMSG_RESUME); + dpm_resume_noirq(PMSG_RESUME); } static apm_event_t get_event(void) @@ -1376,7 +1376,7 @@ static void check_events(void) ignore_bounce = 1; if ((event != APM_NORMAL_RESUME) || (ignore_normal_resume == 0)) { - device_resume(PMSG_RESUME); + dpm_resume_end(PMSG_RESUME); queue_event(event, NULL); } ignore_normal_resume = 0; diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile index 58b32db..de2abbd 100644 --- a/arch/x86/power/Makefile +++ b/arch/x86/power/Makefile @@ -3,5 +3,5 @@ nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_cpu_$(BITS).o := $(nostackp) -obj-$(CONFIG_PM_SLEEP) += cpu_$(BITS).o +obj-$(CONFIG_PM_SLEEP) += cpu.o obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu.c index 5343540..d277ef1 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu.c @@ -1,5 +1,5 @@ /* - * Suspend and hibernation support for x86-64 + * Suspend support specific for i386/x86-64. * * Distribute under GPLv2 * @@ -8,18 +8,28 @@ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> */ -#include <linux/smp.h> #include <linux/suspend.h> -#include <asm/proto.h> -#include <asm/page.h> +#include <linux/smp.h> + #include <asm/pgtable.h> +#include <asm/proto.h> #include <asm/mtrr.h> +#include <asm/page.h> +#include <asm/mce.h> #include <asm/xcr.h> #include <asm/suspend.h> -static void fix_processor_context(void); +#ifdef CONFIG_X86_32 +static struct saved_context saved_context; +unsigned long saved_context_ebx; +unsigned long saved_context_esp, saved_context_ebp; +unsigned long saved_context_esi, saved_context_edi; +unsigned long saved_context_eflags; +#else +/* CONFIG_X86_64 */ struct saved_context saved_context; +#endif /** * __save_processor_state - save CPU registers before creating a @@ -38,19 +48,35 @@ struct saved_context saved_context; */ static void __save_processor_state(struct saved_context *ctxt) { +#ifdef CONFIG_X86_32 + mtrr_save_fixed_ranges(NULL); +#endif kernel_fpu_begin(); /* * descriptor tables */ +#ifdef CONFIG_X86_32 + store_gdt(&ctxt->gdt); + store_idt(&ctxt->idt); +#else +/* CONFIG_X86_64 */ store_gdt((struct desc_ptr *)&ctxt->gdt_limit); store_idt((struct desc_ptr *)&ctxt->idt_limit); +#endif store_tr(ctxt->tr); /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ /* * segment registers */ +#ifdef CONFIG_X86_32 + savesegment(es, ctxt->es); + savesegment(fs, ctxt->fs); + savesegment(gs, ctxt->gs); + savesegment(ss, ctxt->ss); +#else +/* CONFIG_X86_64 */ asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); @@ -62,30 +88,87 @@ static void __save_processor_state(struct saved_context *ctxt) rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); mtrr_save_fixed_ranges(NULL); + rdmsrl(MSR_EFER, ctxt->efer); +#endif + /* * control registers */ - rdmsrl(MSR_EFER, ctxt->efer); ctxt->cr0 = read_cr0(); ctxt->cr2 = read_cr2(); ctxt->cr3 = read_cr3(); +#ifdef CONFIG_X86_32 + ctxt->cr4 = read_cr4_safe(); +#else +/* CONFIG_X86_64 */ ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); +#endif } +/* Needed by apm.c */ void save_processor_state(void) { __save_processor_state(&saved_context); } +#ifdef CONFIG_X86_32 +EXPORT_SYMBOL(save_processor_state); +#endif static void do_fpu_end(void) { /* - * Restore FPU regs if necessary + * Restore FPU regs if necessary. */ kernel_fpu_end(); } +static void fix_processor_context(void) +{ + int cpu = smp_processor_id(); + struct tss_struct *t = &per_cpu(init_tss, cpu); + + set_tss_desc(cpu, t); /* + * This just modifies memory; should not be + * necessary. But... This is necessary, because + * 386 hardware has concept of busy TSS or some + * similar stupidity. + */ + +#ifdef CONFIG_X86_64 + get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; + + syscall_init(); /* This sets MSR_*STAR and related */ +#endif + load_TR_desc(); /* This does ltr */ + load_LDT(¤t->active_mm->context); /* This does lldt */ + + /* + * Now maybe reload the debug registers + */ + if (current->thread.debugreg7) { +#ifdef CONFIG_X86_32 + set_debugreg(current->thread.debugreg0, 0); + set_debugreg(current->thread.debugreg1, 1); + set_debugreg(current->thread.debugreg2, 2); + set_debugreg(current->thread.debugreg3, 3); + /* no 4 and 5 */ + set_debugreg(current->thread.debugreg6, 6); + set_debugreg(current->thread.debugreg7, 7); +#else + /* CONFIG_X86_64 */ + loaddebug(¤t->thread, 0); + loaddebug(¤t->thread, 1); + loaddebug(¤t->thread, 2); + loaddebug(¤t->thread, 3); + /* no 4 and 5 */ + loaddebug(¤t->thread, 6); + loaddebug(¤t->thread, 7); +#endif + } + +} + /** * __restore_processor_state - restore the contents of CPU registers saved * by __save_processor_state() @@ -96,9 +179,16 @@ static void __restore_processor_state(struct saved_context *ctxt) /* * control registers */ + /* cr4 was introduced in the Pentium CPU */ +#ifdef CONFIG_X86_32 + if (ctxt->cr4) + write_cr4(ctxt->cr4); +#else +/* CONFIG X86_64 */ wrmsrl(MSR_EFER, ctxt->efer); write_cr8(ctxt->cr8); write_cr4(ctxt->cr4); +#endif write_cr3(ctxt->cr3); write_cr2(ctxt->cr2); write_cr0(ctxt->cr0); @@ -107,13 +197,31 @@ static void __restore_processor_state(struct saved_context *ctxt) * now restore the descriptor tables to their proper values * ltr is done i fix_processor_context(). */ +#ifdef CONFIG_X86_32 + load_gdt(&ctxt->gdt); + load_idt(&ctxt->idt); +#else +/* CONFIG_X86_64 */ load_gdt((const struct desc_ptr *)&ctxt->gdt_limit); load_idt((const struct desc_ptr *)&ctxt->idt_limit); - +#endif /* * segment registers */ +#ifdef CONFIG_X86_32 + loadsegment(es, ctxt->es); + loadsegment(fs, ctxt->fs); + loadsegment(gs, ctxt->gs); + loadsegment(ss, ctxt->ss); + + /* + * sysenter MSRs + */ + if (boot_cpu_has(X86_FEATURE_SEP)) + enable_sep_cpu(); +#else +/* CONFIG_X86_64 */ asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); @@ -123,6 +231,7 @@ static void __restore_processor_state(struct saved_context *ctxt) wrmsrl(MSR_FS_BASE, ctxt->fs_base); wrmsrl(MSR_GS_BASE, ctxt->gs_base); wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); +#endif /* * restore XCR0 for xsave capable cpu's. @@ -134,41 +243,17 @@ static void __restore_processor_state(struct saved_context *ctxt) do_fpu_end(); mtrr_ap_init(); + +#ifdef CONFIG_X86_32 + mcheck_init(&boot_cpu_data); +#endif } +/* Needed by apm.c */ void restore_processor_state(void) { __restore_processor_state(&saved_context); } - -static void fix_processor_context(void) -{ - int cpu = smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - - /* - * This just modifies memory; should not be necessary. But... This - * is necessary, because 386 hardware has concept of busy TSS or some - * similar stupidity. - */ - set_tss_desc(cpu, t); - - get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; - - syscall_init(); /* This sets MSR_*STAR and related */ - load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - if (current->thread.debugreg7){ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); - } -} +#ifdef CONFIG_X86_32 +EXPORT_SYMBOL(restore_processor_state); +#endif diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c deleted file mode 100644 index ce702c5..0000000 --- a/arch/x86/power/cpu_32.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Suspend support specific for i386. - * - * Distribute under GPLv2 - * - * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> - * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> - */ - -#include <linux/module.h> -#include <linux/suspend.h> -#include <asm/mtrr.h> -#include <asm/mce.h> -#include <asm/xcr.h> -#include <asm/suspend.h> - -static struct saved_context saved_context; - -unsigned long saved_context_ebx; -unsigned long saved_context_esp, saved_context_ebp; -unsigned long saved_context_esi, saved_context_edi; -unsigned long saved_context_eflags; - -static void __save_processor_state(struct saved_context *ctxt) -{ - mtrr_save_fixed_ranges(NULL); - kernel_fpu_begin(); - - /* - * descriptor tables - */ - store_gdt(&ctxt->gdt); - store_idt(&ctxt->idt); - store_tr(ctxt->tr); - - /* - * segment registers - */ - savesegment(es, ctxt->es); - savesegment(fs, ctxt->fs); - savesegment(gs, ctxt->gs); - savesegment(ss, ctxt->ss); - - /* - * control registers - */ - ctxt->cr0 = read_cr0(); - ctxt->cr2 = read_cr2(); - ctxt->cr3 = read_cr3(); - ctxt->cr4 = read_cr4_safe(); -} - -/* Needed by apm.c */ -void save_processor_state(void) -{ - __save_processor_state(&saved_context); -} -EXPORT_SYMBOL(save_processor_state); - -static void do_fpu_end(void) -{ - /* - * Restore FPU regs if necessary. - */ - kernel_fpu_end(); -} - -static void fix_processor_context(void) -{ - int cpu = smp_processor_id(); - struct tss_struct *t = &per_cpu(init_tss, cpu); - - set_tss_desc(cpu, t); /* - * This just modifies memory; should not be - * necessary. But... This is necessary, because - * 386 hardware has concept of busy TSS or some - * similar stupidity. - */ - - load_TR_desc(); /* This does ltr */ - load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - if (current->thread.debugreg7) { - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); - /* no 4 and 5 */ - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } - -} - -static void __restore_processor_state(struct saved_context *ctxt) -{ - /* - * control registers - */ - /* cr4 was introduced in the Pentium CPU */ - if (ctxt->cr4) - write_cr4(ctxt->cr4); - write_cr3(ctxt->cr3); - write_cr2(ctxt->cr2); - write_cr0(ctxt->cr0); - - /* - * now restore the descriptor tables to their proper values - * ltr is done i fix_processor_context(). - */ - load_gdt(&ctxt->gdt); - load_idt(&ctxt->idt); - - /* - * segment registers - */ - loadsegment(es, ctxt->es); - loadsegment(fs, ctxt->fs); - loadsegment(gs, ctxt->gs); - loadsegment(ss, ctxt->ss); - - /* - * sysenter MSRs - */ - if (boot_cpu_has(X86_FEATURE_SEP)) - enable_sep_cpu(); - - /* - * restore XCR0 for xsave capable cpu's. - */ - if (cpu_has_xsave) - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); - - fix_processor_context(); - do_fpu_end(); - mtrr_ap_init(); - mcheck_init(&boot_cpu_data); -} - -/* Needed by apm.c */ -void restore_processor_state(void) -{ - __restore_processor_state(&saved_context); -} -EXPORT_SYMBOL(restore_processor_state); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 8b4708e..ead3f64 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -469,22 +469,6 @@ static void platform_drv_shutdown(struct device *_dev) drv->shutdown(dev); } -static int platform_drv_suspend(struct device *_dev, pm_message_t state) -{ - struct platform_driver *drv = to_platform_driver(_dev->driver); - struct platform_device *dev = to_platform_device(_dev); - - return drv->suspend(dev, state); -} - -static int platform_drv_resume(struct device *_dev) -{ - struct platform_driver *drv = to_platform_driver(_dev->driver); - struct platform_device *dev = to_platform_device(_dev); - - return drv->resume(dev); -} - /** * platform_driver_register * @drv: platform driver structure @@ -498,10 +482,10 @@ int platform_driver_register(struct platform_driver *drv) drv->driver.remove = platform_drv_remove; if (drv->shutdown) drv->driver.shutdown = platform_drv_shutdown; - if (drv->suspend) - drv->driver.suspend = platform_drv_suspend; - if (drv->resume) - drv->driver.resume = platform_drv_resume; + if (drv->suspend || drv->resume) + pr_warning("Platform driver '%s' needs updating - please use " + "dev_pm_ops\n", drv->driver.name); + return driver_register(&drv->driver); } EXPORT_SYMBOL_GPL(platform_driver_register); @@ -633,10 +617,12 @@ static int platform_match(struct device *dev, struct device_driver *drv) static int platform_legacy_suspend(struct device *dev, pm_message_t mesg) { + struct platform_driver *pdrv = to_platform_driver(dev->driver); + struct platform_device *pdev = to_platform_device(dev); int ret = 0; - if (dev->driver && dev->driver->suspend) - ret = dev->driver->suspend(dev, mesg); + if (dev->driver && pdrv->suspend) + ret = pdrv->suspend(pdev, mesg); return ret; } @@ -667,10 +653,12 @@ static int platform_legacy_resume_early(struct device *dev) static int platform_legacy_resume(struct device *dev) { + struct platform_driver *pdrv = to_platform_driver(dev->driver); + struct platform_device *pdev = to_platform_device(dev); int ret = 0; - if (dev->driver && dev->driver->resume) - ret = dev->driver->resume(dev); + if (dev->driver && pdrv->resume) + ret = pdrv->resume(pdev); return ret; } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 3e4bc69..fae7254 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -315,13 +315,13 @@ static void pm_dev_err(struct device *dev, pm_message_t state, char *info, /*------------------------- Resume routines -------------------------*/ /** - * resume_device_noirq - Power on one device (early resume). + * device_resume_noirq - Power on one device (early resume). * @dev: Device. * @state: PM transition of the system being carried out. * * Must be called with interrupts disabled. */ -static int resume_device_noirq(struct device *dev, pm_message_t state) +static int device_resume_noirq(struct device *dev, pm_message_t state) { int error = 0; @@ -334,9 +334,6 @@ static int resume_device_noirq(struct device *dev, pm_message_t state) if (dev->bus->pm) { pm_dev_dbg(dev, state, "EARLY "); error = pm_noirq_op(dev, dev->bus->pm, state); - } else if (dev->bus->resume_early) { - pm_dev_dbg(dev, state, "legacy EARLY "); - error = dev->bus->resume_early(dev); } End: TRACE_RESUME(error); @@ -344,16 +341,16 @@ static int resume_device_noirq(struct device *dev, pm_message_t state) } /** - * dpm_power_up - Power on all regular (non-sysdev) devices. + * dpm_resume_noirq - Power on all regular (non-sysdev) devices. * @state: PM transition of the system being carried out. * - * Execute the appropriate "noirq resume" callback for all devices marked - * as DPM_OFF_IRQ. + * Call the "noirq" resume handlers for all devices marked as + * DPM_OFF_IRQ and enable device drivers to receive interrupts. * * Must be called under dpm_list_mtx. Device drivers should not receive * interrupts while it's being executed. */ -static void dpm_power_up(pm_message_t state) +void dpm_resume_noirq(pm_message_t state) { struct device *dev; @@ -363,33 +360,21 @@ static void dpm_power_up(pm_message_t state) int error; dev->power.status = DPM_OFF; - error = resume_device_noirq(dev, state); + error = device_resume_noirq(dev, state); if (error) pm_dev_err(dev, state, " early", error); } mutex_unlock(&dpm_list_mtx); -} - -/** - * device_power_up - Turn on all devices that need special attention. - * @state: PM transition of the system being carried out. - * - * Call the "early" resume handlers and enable device drivers to receive - * interrupts. - */ -void device_power_up(pm_message_t state) -{ - dpm_power_up(state); resume_device_irqs(); } -EXPORT_SYMBOL_GPL(device_power_up); +EXPORT_SYMBOL_GPL(dpm_resume_noirq); /** - * resume_device - Restore state for one device. + * device_resume - Restore state for one device. * @dev: Device. * @state: PM transition of the system being carried out. */ -static int resume_device(struct device *dev, pm_message_t state) +static int device_resume(struct device *dev, pm_message_t state) { int error = 0; @@ -414,9 +399,6 @@ static int resume_device(struct device *dev, pm_message_t state) if (dev->type->pm) { pm_dev_dbg(dev, state, "type "); error = pm_op(dev, dev->type->pm, state); - } else if (dev->type->resume) { - pm_dev_dbg(dev, state, "legacy type "); - error = dev->type->resume(dev); } if (error) goto End; @@ -462,7 +444,7 @@ static void dpm_resume(pm_message_t state) dev->power.status = DPM_RESUMING; mutex_unlock(&dpm_list_mtx); - error = resume_device(dev, state); + error = device_resume(dev, state); mutex_lock(&dpm_list_mtx); if (error) @@ -480,11 +462,11 @@ static void dpm_resume(pm_message_t state) } /** - * complete_device - Complete a PM transition for given device + * device_complete - Complete a PM transition for given device * @dev: Device. * @state: PM transition of the system being carried out. */ -static void complete_device(struct device *dev, pm_message_t state) +static void device_complete(struct device *dev, pm_message_t state) { down(&dev->sem); @@ -527,7 +509,7 @@ static void dpm_complete(pm_message_t state) dev->power.status = DPM_ON; mutex_unlock(&dpm_list_mtx); - complete_device(dev, state); + device_complete(dev, state); mutex_lock(&dpm_list_mtx); } @@ -540,19 +522,19 @@ static void dpm_complete(pm_message_t state) } /** - * device_resume - Restore state of each device in system. + * dpm_resume_end - Restore state of each device in system. * @state: PM transition of the system being carried out. * * Resume all the devices, unlock them all, and allow new * devices to be registered once again. */ -void device_resume(pm_message_t state) +void dpm_resume_end(pm_message_t state) { might_sleep(); dpm_resume(state); dpm_complete(state); } -EXPORT_SYMBOL_GPL(device_resume); +EXPORT_SYMBOL_GPL(dpm_resume_end); /*------------------------- Suspend routines -------------------------*/ @@ -577,13 +559,13 @@ static pm_message_t resume_event(pm_message_t sleep_state) } /** - * suspend_device_noirq - Shut down one device (late suspend). + * device_suspend_noirq - Shut down one device (late suspend). * @dev: Device. * @state: PM transition of the system being carried out. * * This is called with interrupts off and only a single CPU running. */ -static int suspend_device_noirq(struct device *dev, pm_message_t state) +static int device_suspend_noirq(struct device *dev, pm_message_t state) { int error = 0; @@ -593,24 +575,20 @@ static int suspend_device_noirq(struct device *dev, pm_message_t state) if (dev->bus->pm) { pm_dev_dbg(dev, state, "LATE "); error = pm_noirq_op(dev, dev->bus->pm, state); - } else if (dev->bus->suspend_late) { - pm_dev_dbg(dev, state, "legacy LATE "); - error = dev->bus->suspend_late(dev, state); - suspend_report_result(dev->bus->suspend_late, error); } return error; } /** - * device_power_down - Shut down special devices. + * dpm_suspend_noirq - Power down all regular (non-sysdev) devices. * @state: PM transition of the system being carried out. * - * Prevent device drivers from receiving interrupts and call the "late" + * Prevent device drivers from receiving interrupts and call the "noirq" * suspend handlers. * * Must be called under dpm_list_mtx. */ -int device_power_down(pm_message_t state) +int dpm_suspend_noirq(pm_message_t state) { struct device *dev; int error = 0; @@ -618,7 +596,7 @@ int device_power_down(pm_message_t state) suspend_device_irqs(); mutex_lock(&dpm_list_mtx); list_for_each_entry_reverse(dev, &dpm_list, power.entry) { - error = suspend_device_noirq(dev, state); + error = device_suspend_noirq(dev, state); if (error) { pm_dev_err(dev, state, " late", error); break; @@ -627,17 +605,17 @@ int device_power_down(pm_message_t state) } mutex_unlock(&dpm_list_mtx); if (error) - device_power_up(resume_event(state)); + dpm_resume_noirq(resume_event(state)); return error; } -EXPORT_SYMBOL_GPL(device_power_down); +EXPORT_SYMBOL_GPL(dpm_suspend_noirq); /** - * suspend_device - Save state of one device. + * device_suspend - Save state of one device. * @dev: Device. * @state: PM transition of the system being carried out. */ -static int suspend_device(struct device *dev, pm_message_t state) +static int device_suspend(struct device *dev, pm_message_t state) { int error = 0; @@ -660,10 +638,6 @@ static int suspend_device(struct device *dev, pm_message_t state) if (dev->type->pm) { pm_dev_dbg(dev, state, "type "); error = pm_op(dev, dev->type->pm, state); - } else if (dev->type->suspend) { - pm_dev_dbg(dev, state, "legacy type "); - error = dev->type->suspend(dev, state); - suspend_report_result(dev->type->suspend, error); } if (error) goto End; @@ -704,7 +678,7 @@ static int dpm_suspend(pm_message_t state) get_device(dev); mutex_unlock(&dpm_list_mtx); - error = suspend_device(dev, state); + error = device_suspend(dev, state); mutex_lock(&dpm_list_mtx); if (error) { @@ -723,11 +697,11 @@ static int dpm_suspend(pm_message_t state) } /** - * prepare_device - Execute the ->prepare() callback(s) for given device. + * device_prepare - Execute the ->prepare() callback(s) for given device. * @dev: Device. * @state: PM transition of the system being carried out. */ -static int prepare_device(struct device *dev, pm_message_t state) +static int device_prepare(struct device *dev, pm_message_t state) { int error = 0; @@ -781,7 +755,7 @@ static int dpm_prepare(pm_message_t state) dev->power.status = DPM_PREPARING; mutex_unlock(&dpm_list_mtx); - error = prepare_device(dev, state); + error = device_prepare(dev, state); mutex_lock(&dpm_list_mtx); if (error) { @@ -807,12 +781,12 @@ static int dpm_prepare(pm_message_t state) } /** - * device_suspend - Save state and stop all devices in system. + * dpm_suspend_start - Save state and stop all devices in system. * @state: PM transition of the system being carried out. * * Prepare and suspend all devices. */ -int device_suspend(pm_message_t state) +int dpm_suspend_start(pm_message_t state) { int error; @@ -822,7 +796,7 @@ int device_suspend(pm_message_t state) error = dpm_suspend(state); return error; } -EXPORT_SYMBOL_GPL(device_suspend); +EXPORT_SYMBOL_GPL(dpm_suspend_start); void __suspend_report_result(const char *function, void *fn, int ret) { diff --git a/drivers/base/sys.c b/drivers/base/sys.c index 3236b43..9742a78 100644 --- a/drivers/base/sys.c +++ b/drivers/base/sys.c @@ -343,11 +343,15 @@ static void __sysdev_resume(struct sys_device *dev) /* First, call the class-specific one */ if (cls->resume) cls->resume(dev); + WARN_ONCE(!irqs_disabled(), + "Interrupts enabled after %pF\n", cls->resume); /* Call auxillary drivers next. */ list_for_each_entry(drv, &cls->drivers, entry) { if (drv->resume) drv->resume(dev); + WARN_ONCE(!irqs_disabled(), + "Interrupts enabled after %pF\n", drv->resume); } } @@ -377,6 +381,9 @@ int sysdev_suspend(pm_message_t state) if (ret) return ret; + WARN_ONCE(!irqs_disabled(), + "Interrupts enabled while suspending system devices\n"); + pr_debug("Suspending System Devices\n"); list_for_each_entry_reverse(cls, &system_kset->list, kset.kobj.entry) { @@ -393,6 +400,9 @@ int sysdev_suspend(pm_message_t state) if (ret) goto aux_driver; } + WARN_ONCE(!irqs_disabled(), + "Interrupts enabled after %pF\n", + drv->suspend); } /* Now call the generic one */ @@ -400,6 +410,9 @@ int sysdev_suspend(pm_message_t state) ret = cls->suspend(sysdev, state); if (ret) goto cls_driver; + WARN_ONCE(!irqs_disabled(), + "Interrupts enabled after %pF\n", + cls->suspend); } } } @@ -452,6 +465,9 @@ int sysdev_resume(void) { struct sysdev_class *cls; + WARN_ONCE(!irqs_disabled(), + "Interrupts enabled while resuming system devices\n"); + pr_debug("Resuming System Devices\n"); list_for_each_entry(cls, &system_kset->list, kset.kobj.entry) { diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index fddc202..10d03d7 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -43,7 +43,7 @@ static int xen_suspend(void *data) if (err) { printk(KERN_ERR "xen_suspend: sysdev_suspend failed: %d\n", err); - device_power_up(PMSG_RESUME); + dpm_resume_noirq(PMSG_RESUME); return err; } @@ -69,7 +69,7 @@ static int xen_suspend(void *data) } sysdev_resume(); - device_power_up(PMSG_RESUME); + dpm_resume_noirq(PMSG_RESUME); return 0; } @@ -92,18 +92,18 @@ static void do_suspend(void) } #endif - err = device_suspend(PMSG_SUSPEND); + err = dpm_suspend_start(PMSG_SUSPEND); if (err) { - printk(KERN_ERR "xen suspend: device_suspend %d\n", err); + printk(KERN_ERR "xen suspend: dpm_suspend_start %d\n", err); goto out; } printk(KERN_DEBUG "suspending xenstore...\n"); xs_suspend(); - err = device_power_down(PMSG_SUSPEND); + err = dpm_suspend_noirq(PMSG_SUSPEND); if (err) { - printk(KERN_ERR "device_power_down failed: %d\n", err); + printk(KERN_ERR "dpm_suspend_noirq failed: %d\n", err); goto resume_devices; } @@ -119,10 +119,10 @@ static void do_suspend(void) } else xs_suspend_cancel(); - device_power_up(PMSG_RESUME); + dpm_resume_noirq(PMSG_RESUME); resume_devices: - device_resume(PMSG_RESUME); + dpm_resume_end(PMSG_RESUME); /* Make sure timer events get retriggered on all CPUs */ clock_was_set(); diff --git a/include/linux/device.h b/include/linux/device.h index 5d5c197..a4a7b10 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -62,8 +62,6 @@ struct bus_type { void (*shutdown)(struct device *dev); int (*suspend)(struct device *dev, pm_message_t state); - int (*suspend_late)(struct device *dev, pm_message_t state); - int (*resume_early)(struct device *dev); int (*resume)(struct device *dev); struct dev_pm_ops *pm; @@ -291,9 +289,6 @@ struct device_type { int (*uevent)(struct device *dev, struct kobj_uevent_env *env); void (*release)(struct device *dev); - int (*suspend)(struct device *dev, pm_message_t state); - int (*resume)(struct device *dev); - struct dev_pm_ops *pm; }; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ff374ce..c41e812 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -183,6 +183,7 @@ extern void disable_irq(unsigned int irq); extern void enable_irq(unsigned int irq); /* The following three functions are for the core kernel use only. */ +#ifdef CONFIG_GENERIC_HARDIRQS extern void suspend_device_irqs(void); extern void resume_device_irqs(void); #ifdef CONFIG_PM_SLEEP @@ -190,6 +191,11 @@ extern int check_wakeup_irqs(void); #else static inline int check_wakeup_irqs(void) { return 0; } #endif +#else +static inline void suspend_device_irqs(void) { }; +static inline void resume_device_irqs(void) { }; +static inline int check_wakeup_irqs(void) { return 0; } +#endif #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) diff --git a/include/linux/pm.h b/include/linux/pm.h index 1d4e2d2..b3f7476 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -382,14 +382,13 @@ struct dev_pm_info { #ifdef CONFIG_PM_SLEEP extern void device_pm_lock(void); extern int sysdev_resume(void); -extern void device_power_up(pm_message_t state); -extern void device_resume(pm_message_t state); +extern void dpm_resume_noirq(pm_message_t state); +extern void dpm_resume_end(pm_message_t state); extern void device_pm_unlock(void); extern int sysdev_suspend(pm_message_t state); -extern int device_power_down(pm_message_t state); -extern int device_suspend(pm_message_t state); -extern int device_prepare_suspend(pm_message_t state); +extern int dpm_suspend_noirq(pm_message_t state); +extern int dpm_suspend_start(pm_message_t state); extern void __suspend_report_result(const char *function, void *fn, int ret); @@ -403,7 +402,7 @@ extern void __suspend_report_result(const char *function, void *fn, int ret); #define device_pm_lock() do {} while (0) #define device_pm_unlock() do {} while (0) -static inline int device_suspend(pm_message_t state) +static inline int dpm_suspend_start(pm_message_t state) { return 0; } diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 795032e..cd15df6 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -245,11 +245,6 @@ extern unsigned long get_safe_page(gfp_t gfp_mask); extern void hibernation_set_ops(struct platform_hibernation_ops *ops); extern int hibernate(void); -extern int hibernate_nvs_register(unsigned long start, unsigned long size); -extern int hibernate_nvs_alloc(void); -extern void hibernate_nvs_free(void); -extern void hibernate_nvs_save(void); -extern void hibernate_nvs_restore(void); extern bool system_entering_hibernation(void); #else /* CONFIG_HIBERNATION */ static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } @@ -258,6 +253,16 @@ static inline void swsusp_unset_page_free(struct page *p) {} static inline void hibernation_set_ops(struct platform_hibernation_ops *ops) {} static inline int hibernate(void) { return -ENOSYS; } +static inline bool system_entering_hibernation(void) { return false; } +#endif /* CONFIG_HIBERNATION */ + +#ifdef CONFIG_HIBERNATION_NVS +extern int hibernate_nvs_register(unsigned long start, unsigned long size); +extern int hibernate_nvs_alloc(void); +extern void hibernate_nvs_free(void); +extern void hibernate_nvs_save(void); +extern void hibernate_nvs_restore(void); +#else /* CONFIG_HIBERNATION_NVS */ static inline int hibernate_nvs_register(unsigned long a, unsigned long b) { return 0; @@ -266,8 +271,7 @@ static inline int hibernate_nvs_alloc(void) { return 0; } static inline void hibernate_nvs_free(void) {} static inline void hibernate_nvs_save(void) {} static inline void hibernate_nvs_restore(void) {} -static inline bool system_entering_hibernation(void) { return false; } -#endif /* CONFIG_HIBERNATION */ +#endif /* CONFIG_HIBERNATION_NVS */ #ifdef CONFIG_PM_SLEEP void save_processor_state(void); diff --git a/kernel/kexec.c b/kernel/kexec.c index e498377..ae1c352 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1448,17 +1448,17 @@ int kernel_kexec(void) goto Restore_console; } suspend_console(); - error = device_suspend(PMSG_FREEZE); + error = dpm_suspend_start(PMSG_FREEZE); if (error) goto Resume_console; - /* At this point, device_suspend() has been called, - * but *not* device_power_down(). We *must* - * device_power_down() now. Otherwise, drivers for + /* At this point, dpm_suspend_start() has been called, + * but *not* dpm_suspend_noirq(). We *must* call + * dpm_suspend_noirq() now. Otherwise, drivers for * some devices (e.g. interrupt controllers) become * desynchronized with the actual state of the * hardware at resume time, and evil weirdness ensues. */ - error = device_power_down(PMSG_FREEZE); + error = dpm_suspend_noirq(PMSG_FREEZE); if (error) goto Resume_devices; error = disable_nonboot_cpus(); @@ -1486,9 +1486,9 @@ int kernel_kexec(void) local_irq_enable(); Enable_cpus: enable_nonboot_cpus(); - device_power_up(PMSG_RESTORE); + dpm_resume_noirq(PMSG_RESTORE); Resume_devices: - device_resume(PMSG_RESTORE); + dpm_resume_end(PMSG_RESTORE); Resume_console: resume_console(); thaw_processes(); diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 23bd4da..72067cb 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -116,9 +116,13 @@ config SUSPEND_FREEZER Turning OFF this setting is NOT recommended! If in doubt, say Y. +config HIBERNATION_NVS + bool + config HIBERNATION bool "Hibernation (aka 'suspend to disk')" depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE + select HIBERNATION_NVS if HAS_IOMEM ---help--- Enable the suspend to disk (STD) functionality, which is usually called "hibernation" in user interfaces. STD checkpoints the diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 720ea4f..c3b81c3 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -6,6 +6,9 @@ endif obj-$(CONFIG_PM) += main.o obj-$(CONFIG_PM_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o -obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o +obj-$(CONFIG_SUSPEND) += suspend.o +obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o +obj-$(CONFIG_HIBERNATION) += swsusp.o hibernate.o snapshot.o swap.o user.o +obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o diff --git a/kernel/power/disk.c b/kernel/power/hibernate.c index 5cb080e..81d2e74 100644 --- a/kernel/power/disk.c +++ b/kernel/power/hibernate.c @@ -1,12 +1,12 @@ /* - * kernel/power/disk.c - Suspend-to-disk support. + * kernel/power/hibernate.c - Hibernation (a.k.a suspend-to-disk) support. * * Copyright (c) 2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab * Copyright (c) 2004 Pavel Machek <pavel@suse.cz> + * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc. * * This file is released under the GPLv2. - * */ #include <linux/suspend.h> @@ -215,13 +215,13 @@ static int create_image(int platform_mode) if (error) return error; - /* At this point, device_suspend() has been called, but *not* - * device_power_down(). We *must* call device_power_down() now. + /* At this point, dpm_suspend_start() has been called, but *not* + * dpm_suspend_noirq(). We *must* call dpm_suspend_noirq() now. * Otherwise, drivers for some devices (e.g. interrupt controllers) * become desynchronized with the actual state of the hardware * at resume time, and evil weirdness ensues. */ - error = device_power_down(PMSG_FREEZE); + error = dpm_suspend_noirq(PMSG_FREEZE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting hibernation\n"); @@ -262,7 +262,7 @@ static int create_image(int platform_mode) Power_up: sysdev_resume(); - /* NOTE: device_power_up() is just a resume() for devices + /* NOTE: dpm_resume_noirq() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ @@ -275,7 +275,7 @@ static int create_image(int platform_mode) Platform_finish: platform_finish(platform_mode); - device_power_up(in_suspend ? + dpm_resume_noirq(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); return error; @@ -304,7 +304,7 @@ int hibernation_snapshot(int platform_mode) goto Close; suspend_console(); - error = device_suspend(PMSG_FREEZE); + error = dpm_suspend_start(PMSG_FREEZE); if (error) goto Recover_platform; @@ -315,7 +315,7 @@ int hibernation_snapshot(int platform_mode) /* Control returns here after successful restore */ Resume_devices: - device_resume(in_suspend ? + dpm_resume_end(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); resume_console(); Close: @@ -339,7 +339,7 @@ static int resume_target_kernel(bool platform_mode) { int error; - error = device_power_down(PMSG_QUIESCE); + error = dpm_suspend_noirq(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); @@ -394,7 +394,7 @@ static int resume_target_kernel(bool platform_mode) Cleanup: platform_restore_cleanup(platform_mode); - device_power_up(PMSG_RECOVER); + dpm_resume_noirq(PMSG_RECOVER); return error; } @@ -414,10 +414,10 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); suspend_console(); - error = device_suspend(PMSG_QUIESCE); + error = dpm_suspend_start(PMSG_QUIESCE); if (!error) { error = resume_target_kernel(platform_mode); - device_resume(PMSG_RECOVER); + dpm_resume_end(PMSG_RECOVER); } resume_console(); pm_restore_console(); @@ -447,14 +447,14 @@ int hibernation_platform_enter(void) entering_platform_hibernation = true; suspend_console(); - error = device_suspend(PMSG_HIBERNATE); + error = dpm_suspend_start(PMSG_HIBERNATE); if (error) { if (hibernation_ops->recover) hibernation_ops->recover(); goto Resume_devices; } - error = device_power_down(PMSG_HIBERNATE); + error = dpm_suspend_noirq(PMSG_HIBERNATE); if (error) goto Resume_devices; @@ -479,11 +479,11 @@ int hibernation_platform_enter(void) Platofrm_finish: hibernation_ops->finish(); - device_power_up(PMSG_RESTORE); + dpm_suspend_noirq(PMSG_RESTORE); Resume_devices: entering_platform_hibernation = false; - device_resume(PMSG_RESTORE); + dpm_resume_end(PMSG_RESTORE); resume_console(); Close: diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/hibernate_nvs.c new file mode 100644 index 0000000..39ac698 --- /dev/null +++ b/kernel/power/hibernate_nvs.c @@ -0,0 +1,135 @@ +/* + * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory + * + * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/mm.h> +#include <linux/suspend.h> + +/* + * Platforms, like ACPI, may want us to save some memory used by them during + * hibernation and to restore the contents of this memory during the subsequent + * resume. The code below implements a mechanism allowing us to do that. + */ + +struct nvs_page { + unsigned long phys_start; + unsigned int size; + void *kaddr; + void *data; + struct list_head node; +}; + +static LIST_HEAD(nvs_list); + +/** + * hibernate_nvs_register - register platform NVS memory region to save + * @start - physical address of the region + * @size - size of the region + * + * The NVS region need not be page-aligned (both ends) and we arrange + * things so that the data from page-aligned addresses in this region will + * be copied into separate RAM pages. + */ +int hibernate_nvs_register(unsigned long start, unsigned long size) +{ + struct nvs_page *entry, *next; + + while (size > 0) { + unsigned int nr_bytes; + + entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); + if (!entry) + goto Error; + + list_add_tail(&entry->node, &nvs_list); + entry->phys_start = start; + nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); + entry->size = (size < nr_bytes) ? size : nr_bytes; + + start += entry->size; + size -= entry->size; + } + return 0; + + Error: + list_for_each_entry_safe(entry, next, &nvs_list, node) { + list_del(&entry->node); + kfree(entry); + } + return -ENOMEM; +} + +/** + * hibernate_nvs_free - free data pages allocated for saving NVS regions + */ +void hibernate_nvs_free(void) +{ + struct nvs_page *entry; + + list_for_each_entry(entry, &nvs_list, node) + if (entry->data) { + free_page((unsigned long)entry->data); + entry->data = NULL; + if (entry->kaddr) { + iounmap(entry->kaddr); + entry->kaddr = NULL; + } + } +} + +/** + * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions + */ +int hibernate_nvs_alloc(void) +{ + struct nvs_page *entry; + + list_for_each_entry(entry, &nvs_list, node) { + entry->data = (void *)__get_free_page(GFP_KERNEL); + if (!entry->data) { + hibernate_nvs_free(); + return -ENOMEM; + } + } + return 0; +} + +/** + * hibernate_nvs_save - save NVS memory regions + */ +void hibernate_nvs_save(void) +{ + struct nvs_page *entry; + + printk(KERN_INFO "PM: Saving platform NVS memory\n"); + + list_for_each_entry(entry, &nvs_list, node) + if (entry->data) { + entry->kaddr = ioremap(entry->phys_start, entry->size); + memcpy(entry->data, entry->kaddr, entry->size); + } +} + +/** + * hibernate_nvs_restore - restore NVS memory regions + * + * This function is going to be called with interrupts disabled, so it + * cannot iounmap the virtual addresses used to access the NVS region. + */ +void hibernate_nvs_restore(void) +{ + struct nvs_page *entry; + + printk(KERN_INFO "PM: Restoring platform NVS memory\n"); + + list_for_each_entry(entry, &nvs_list, node) + if (entry->data) + memcpy(entry->kaddr, entry->data, entry->size); +} diff --git a/kernel/power/main.c b/kernel/power/main.c index 8680282..f710e36 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -8,20 +8,9 @@ * */ -#include <linux/module.h> -#include <linux/suspend.h> #include <linux/kobject.h> #include <linux/string.h> -#include <linux/delay.h> -#include <linux/errno.h> -#include <linux/kmod.h> -#include <linux/init.h> -#include <linux/console.h> -#include <linux/cpu.h> #include <linux/resume-trace.h> -#include <linux/freezer.h> -#include <linux/vmstat.h> -#include <linux/syscalls.h> #include "power.h" @@ -119,373 +108,6 @@ power_attr(pm_test); #endif /* CONFIG_PM_SLEEP */ -#ifdef CONFIG_SUSPEND - -static int suspend_test(int level) -{ -#ifdef CONFIG_PM_DEBUG - if (pm_test_level == level) { - printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n"); - mdelay(5000); - return 1; - } -#endif /* !CONFIG_PM_DEBUG */ - return 0; -} - -#ifdef CONFIG_PM_TEST_SUSPEND - -/* - * We test the system suspend code by setting an RTC wakealarm a short - * time in the future, then suspending. Suspending the devices won't - * normally take long ... some systems only need a few milliseconds. - * - * The time it takes is system-specific though, so when we test this - * during system bootup we allow a LOT of time. - */ -#define TEST_SUSPEND_SECONDS 5 - -static unsigned long suspend_test_start_time; - -static void suspend_test_start(void) -{ - /* FIXME Use better timebase than "jiffies", ideally a clocksource. - * What we want is a hardware counter that will work correctly even - * during the irqs-are-off stages of the suspend/resume cycle... - */ - suspend_test_start_time = jiffies; -} - -static void suspend_test_finish(const char *label) -{ - long nj = jiffies - suspend_test_start_time; - unsigned msec; - - msec = jiffies_to_msecs(abs(nj)); - pr_info("PM: %s took %d.%03d seconds\n", label, - msec / 1000, msec % 1000); - - /* Warning on suspend means the RTC alarm period needs to be - * larger -- the system was sooo slooowwww to suspend that the - * alarm (should have) fired before the system went to sleep! - * - * Warning on either suspend or resume also means the system - * has some performance issues. The stack dump of a WARN_ON - * is more likely to get the right attention than a printk... - */ - WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label); -} - -#else - -static void suspend_test_start(void) -{ -} - -static void suspend_test_finish(const char *label) -{ -} - -#endif - -/* This is just an arbitrary number */ -#define FREE_PAGE_NUMBER (100) - -static struct platform_suspend_ops *suspend_ops; - -/** - * suspend_set_ops - Set the global suspend method table. - * @ops: Pointer to ops structure. - */ - -void suspend_set_ops(struct platform_suspend_ops *ops) -{ - mutex_lock(&pm_mutex); - suspend_ops = ops; - mutex_unlock(&pm_mutex); -} - -/** - * suspend_valid_only_mem - generic memory-only valid callback - * - * Platform drivers that implement mem suspend only and only need - * to check for that in their .valid callback can use this instead - * of rolling their own .valid callback. - */ -int suspend_valid_only_mem(suspend_state_t state) -{ - return state == PM_SUSPEND_MEM; -} - -/** - * suspend_prepare - Do prep work before entering low-power state. - * - * This is common code that is called for each state that we're entering. - * Run suspend notifiers, allocate a console and stop all processes. - */ -static int suspend_prepare(void) -{ - int error; - unsigned int free_pages; - - if (!suspend_ops || !suspend_ops->enter) - return -EPERM; - - pm_prepare_console(); - - error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); - if (error) - goto Finish; - - error = usermodehelper_disable(); - if (error) - goto Finish; - - if (suspend_freeze_processes()) { - error = -EAGAIN; - goto Thaw; - } - - free_pages = global_page_state(NR_FREE_PAGES); - if (free_pages < FREE_PAGE_NUMBER) { - pr_debug("PM: free some memory\n"); - shrink_all_memory(FREE_PAGE_NUMBER - free_pages); - if (nr_free_pages() < FREE_PAGE_NUMBER) { - error = -ENOMEM; - printk(KERN_ERR "PM: No enough memory\n"); - } - } - if (!error) - return 0; - - Thaw: - suspend_thaw_processes(); - usermodehelper_enable(); - Finish: - pm_notifier_call_chain(PM_POST_SUSPEND); - pm_restore_console(); - return error; -} - -/* default implementation */ -void __attribute__ ((weak)) arch_suspend_disable_irqs(void) -{ - local_irq_disable(); -} - -/* default implementation */ -void __attribute__ ((weak)) arch_suspend_enable_irqs(void) -{ - local_irq_enable(); -} - -/** - * suspend_enter - enter the desired system sleep state. - * @state: state to enter - * - * This function should be called after devices have been suspended. - */ -static int suspend_enter(suspend_state_t state) -{ - int error; - - if (suspend_ops->prepare) { - error = suspend_ops->prepare(); - if (error) - return error; - } - - error = device_power_down(PMSG_SUSPEND); - if (error) { - printk(KERN_ERR "PM: Some devices failed to power down\n"); - goto Platfrom_finish; - } - - if (suspend_ops->prepare_late) { - error = suspend_ops->prepare_late(); - if (error) - goto Power_up_devices; - } - - if (suspend_test(TEST_PLATFORM)) - goto Platform_wake; - - error = disable_nonboot_cpus(); - if (error || suspend_test(TEST_CPUS)) - goto Enable_cpus; - - arch_suspend_disable_irqs(); - BUG_ON(!irqs_disabled()); - - error = sysdev_suspend(PMSG_SUSPEND); - if (!error) { - if (!suspend_test(TEST_CORE)) - error = suspend_ops->enter(state); - sysdev_resume(); - } - - arch_suspend_enable_irqs(); - BUG_ON(irqs_disabled()); - - Enable_cpus: - enable_nonboot_cpus(); - - Platform_wake: - if (suspend_ops->wake) - suspend_ops->wake(); - - Power_up_devices: - device_power_up(PMSG_RESUME); - - Platfrom_finish: - if (suspend_ops->finish) - suspend_ops->finish(); - - return error; -} - -/** - * suspend_devices_and_enter - suspend devices and enter the desired system - * sleep state. - * @state: state to enter - */ -int suspend_devices_and_enter(suspend_state_t state) -{ - int error; - - if (!suspend_ops) - return -ENOSYS; - - if (suspend_ops->begin) { - error = suspend_ops->begin(state); - if (error) - goto Close; - } - suspend_console(); - suspend_test_start(); - error = device_suspend(PMSG_SUSPEND); - if (error) { - printk(KERN_ERR "PM: Some devices failed to suspend\n"); - goto Recover_platform; - } - suspend_test_finish("suspend devices"); - if (suspend_test(TEST_DEVICES)) - goto Recover_platform; - - suspend_enter(state); - - Resume_devices: - suspend_test_start(); - device_resume(PMSG_RESUME); - suspend_test_finish("resume devices"); - resume_console(); - Close: - if (suspend_ops->end) - suspend_ops->end(); - return error; - - Recover_platform: - if (suspend_ops->recover) - suspend_ops->recover(); - goto Resume_devices; -} - -/** - * suspend_finish - Do final work before exiting suspend sequence. - * - * Call platform code to clean up, restart processes, and free the - * console that we've allocated. This is not called for suspend-to-disk. - */ -static void suspend_finish(void) -{ - suspend_thaw_processes(); - usermodehelper_enable(); - pm_notifier_call_chain(PM_POST_SUSPEND); - pm_restore_console(); -} - - - - -static const char * const pm_states[PM_SUSPEND_MAX] = { - [PM_SUSPEND_STANDBY] = "standby", - [PM_SUSPEND_MEM] = "mem", -}; - -static inline int valid_state(suspend_state_t state) -{ - /* All states need lowlevel support and need to be valid - * to the lowlevel implementation, no valid callback - * implies that none are valid. */ - if (!suspend_ops || !suspend_ops->valid || !suspend_ops->valid(state)) - return 0; - return 1; -} - - -/** - * enter_state - Do common work of entering low-power state. - * @state: pm_state structure for state we're entering. - * - * Make sure we're the only ones trying to enter a sleep state. Fail - * if someone has beat us to it, since we don't want anything weird to - * happen when we wake up. - * Then, do the setup for suspend, enter the state, and cleaup (after - * we've woken up). - */ -static int enter_state(suspend_state_t state) -{ - int error; - - if (!valid_state(state)) - return -ENODEV; - - if (!mutex_trylock(&pm_mutex)) - return -EBUSY; - - printk(KERN_INFO "PM: Syncing filesystems ... "); - sys_sync(); - printk("done.\n"); - - pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); - error = suspend_prepare(); - if (error) - goto Unlock; - - if (suspend_test(TEST_FREEZER)) - goto Finish; - - pr_debug("PM: Entering %s sleep\n", pm_states[state]); - error = suspend_devices_and_enter(state); - - Finish: - pr_debug("PM: Finishing wakeup.\n"); - suspend_finish(); - Unlock: - mutex_unlock(&pm_mutex); - return error; -} - - -/** - * pm_suspend - Externally visible function for suspending system. - * @state: Enumerated value of state to enter. - * - * Determine whether or not value is within range, get state - * structure, and enter (above). - */ - -int pm_suspend(suspend_state_t state) -{ - if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) - return enter_state(state); - return -EINVAL; -} - -EXPORT_SYMBOL(pm_suspend); - -#endif /* CONFIG_SUSPEND */ - struct kobject *power_kobj; /** @@ -498,7 +120,6 @@ struct kobject *power_kobj; * store() accepts one of those strings, translates it into the * proper enumerated value, and initiates a suspend transition. */ - static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -596,7 +217,6 @@ static struct attribute_group attr_group = { .attrs = g, }; - static int __init pm_init(void) { power_kobj = kobject_create_and_add("power", NULL); @@ -606,144 +226,3 @@ static int __init pm_init(void) } core_initcall(pm_init); - - -#ifdef CONFIG_PM_TEST_SUSPEND - -#include <linux/rtc.h> - -/* - * To test system suspend, we need a hands-off mechanism to resume the - * system. RTCs wake alarms are a common self-contained mechanism. - */ - -static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) -{ - static char err_readtime[] __initdata = - KERN_ERR "PM: can't read %s time, err %d\n"; - static char err_wakealarm [] __initdata = - KERN_ERR "PM: can't set %s wakealarm, err %d\n"; - static char err_suspend[] __initdata = - KERN_ERR "PM: suspend test failed, error %d\n"; - static char info_test[] __initdata = - KERN_INFO "PM: test RTC wakeup from '%s' suspend\n"; - - unsigned long now; - struct rtc_wkalrm alm; - int status; - - /* this may fail if the RTC hasn't been initialized */ - status = rtc_read_time(rtc, &alm.time); - if (status < 0) { - printk(err_readtime, dev_name(&rtc->dev), status); - return; - } - rtc_tm_to_time(&alm.time, &now); - - memset(&alm, 0, sizeof alm); - rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time); - alm.enabled = true; - - status = rtc_set_alarm(rtc, &alm); - if (status < 0) { - printk(err_wakealarm, dev_name(&rtc->dev), status); - return; - } - - if (state == PM_SUSPEND_MEM) { - printk(info_test, pm_states[state]); - status = pm_suspend(state); - if (status == -ENODEV) - state = PM_SUSPEND_STANDBY; - } - if (state == PM_SUSPEND_STANDBY) { - printk(info_test, pm_states[state]); - status = pm_suspend(state); - } - if (status < 0) - printk(err_suspend, status); - - /* Some platforms can't detect that the alarm triggered the - * wakeup, or (accordingly) disable it after it afterwards. - * It's supposed to give oneshot behavior; cope. - */ - alm.enabled = false; - rtc_set_alarm(rtc, &alm); -} - -static int __init has_wakealarm(struct device *dev, void *name_ptr) -{ - struct rtc_device *candidate = to_rtc_device(dev); - - if (!candidate->ops->set_alarm) - return 0; - if (!device_may_wakeup(candidate->dev.parent)) - return 0; - - *(const char **)name_ptr = dev_name(dev); - return 1; -} - -/* - * Kernel options like "test_suspend=mem" force suspend/resume sanity tests - * at startup time. They're normally disabled, for faster boot and because - * we can't know which states really work on this particular system. - */ -static suspend_state_t test_state __initdata = PM_SUSPEND_ON; - -static char warn_bad_state[] __initdata = - KERN_WARNING "PM: can't test '%s' suspend state\n"; - -static int __init setup_test_suspend(char *value) -{ - unsigned i; - - /* "=mem" ==> "mem" */ - value++; - for (i = 0; i < PM_SUSPEND_MAX; i++) { - if (!pm_states[i]) - continue; - if (strcmp(pm_states[i], value) != 0) - continue; - test_state = (__force suspend_state_t) i; - return 0; - } - printk(warn_bad_state, value); - return 0; -} -__setup("test_suspend", setup_test_suspend); - -static int __init test_suspend(void) -{ - static char warn_no_rtc[] __initdata = - KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n"; - - char *pony = NULL; - struct rtc_device *rtc = NULL; - - /* PM is initialized by now; is that state testable? */ - if (test_state == PM_SUSPEND_ON) - goto done; - if (!valid_state(test_state)) { - printk(warn_bad_state, pm_states[test_state]); - goto done; - } - - /* RTCs have initialized by now too ... can we use one? */ - class_find_device(rtc_class, NULL, &pony, has_wakealarm); - if (pony) - rtc = rtc_class_open(pony); - if (!rtc) { - printk(warn_no_rtc); - goto done; - } - - /* go for it */ - test_wakealarm(rtc, test_state); - rtc_class_close(rtc); -done: - return 0; -} -late_initcall(test_suspend); - -#endif /* CONFIG_PM_TEST_SUSPEND */ diff --git a/kernel/power/power.h b/kernel/power/power.h index 46b5ec7..26d5a26 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -45,7 +45,7 @@ static inline char *check_image_kernel(struct swsusp_info *info) */ #define SPARE_PAGES ((1024 * 1024) >> PAGE_SHIFT) -/* kernel/power/disk.c */ +/* kernel/power/hibernate.c */ extern int hibernation_snapshot(int platform_mode); extern int hibernation_restore(int platform_mode); extern int hibernation_platform_enter(void); @@ -74,7 +74,7 @@ extern asmlinkage int swsusp_arch_resume(void); extern int create_basic_memory_bitmaps(void); extern void free_basic_memory_bitmaps(void); -extern unsigned int count_data_pages(void); +extern int swsusp_shrink_memory(void); /** * Auxiliary structure used for reading the snapshot image data and @@ -147,9 +147,8 @@ extern int swsusp_swap_in_use(void); */ #define SF_PLATFORM_MODE 1 -/* kernel/power/disk.c */ +/* kernel/power/hibernate.c */ extern int swsusp_check(void); -extern int swsusp_shrink_memory(void); extern void swsusp_free(void); extern int swsusp_read(unsigned int *flags_p); extern int swsusp_write(unsigned int flags); @@ -161,22 +160,36 @@ extern void swsusp_show_speed(struct timeval *, struct timeval *, unsigned int, char *); #ifdef CONFIG_SUSPEND -/* kernel/power/main.c */ +/* kernel/power/suspend.c */ +extern const char *const pm_states[]; + +extern bool valid_state(suspend_state_t state); extern int suspend_devices_and_enter(suspend_state_t state); +extern int enter_state(suspend_state_t state); #else /* !CONFIG_SUSPEND */ static inline int suspend_devices_and_enter(suspend_state_t state) { return -ENOSYS; } +static inline int enter_state(suspend_state_t state) { return -ENOSYS; } +static inline bool valid_state(suspend_state_t state) { return false; } #endif /* !CONFIG_SUSPEND */ +#ifdef CONFIG_PM_TEST_SUSPEND +/* kernel/power/suspend_test.c */ +extern void suspend_test_start(void); +extern void suspend_test_finish(const char *label); +#else /* !CONFIG_PM_TEST_SUSPEND */ +static inline void suspend_test_start(void) {} +static inline void suspend_test_finish(const char *label) {} +#endif /* !CONFIG_PM_TEST_SUSPEND */ + #ifdef CONFIG_PM_SLEEP /* kernel/power/main.c */ extern int pm_notifier_call_chain(unsigned long val); #endif #ifdef CONFIG_HIGHMEM -unsigned int count_highmem_pages(void); int restore_highmem(void); #else static inline unsigned int count_highmem_pages(void) { return 0; } diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 33e2e4a..523a451 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -39,6 +39,14 @@ static int swsusp_page_is_free(struct page *); static void swsusp_set_page_forbidden(struct page *); static void swsusp_unset_page_forbidden(struct page *); +/* + * Preferred image size in bytes (tunable via /sys/power/image_size). + * When it is set to N, swsusp will do its best to ensure the image + * size will not exceed N bytes, but if that is impossible, it will + * try to create the smallest image possible. + */ +unsigned long image_size = 500 * 1024 * 1024; + /* List of PBEs needed for restoring the pages that were allocated before * the suspend and included in the suspend image, but have also been * allocated by the "resume" kernel, so their contents cannot be written @@ -840,7 +848,7 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) * pages. */ -unsigned int count_highmem_pages(void) +static unsigned int count_highmem_pages(void) { struct zone *zone; unsigned int n = 0; @@ -902,7 +910,7 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn) * pages. */ -unsigned int count_data_pages(void) +static unsigned int count_data_pages(void) { struct zone *zone; unsigned long pfn, max_zone_pfn; @@ -1058,6 +1066,74 @@ void swsusp_free(void) buffer = NULL; } +/** + * swsusp_shrink_memory - Try to free as much memory as needed + * + * ... but do not OOM-kill anyone + * + * Notice: all userland should be stopped before it is called, or + * livelock is possible. + */ + +#define SHRINK_BITE 10000 +static inline unsigned long __shrink_memory(long tmp) +{ + if (tmp > SHRINK_BITE) + tmp = SHRINK_BITE; + return shrink_all_memory(tmp); +} + +int swsusp_shrink_memory(void) +{ + long tmp; + struct zone *zone; + unsigned long pages = 0; + unsigned int i = 0; + char *p = "-\\|/"; + struct timeval start, stop; + + printk(KERN_INFO "PM: Shrinking memory... "); + do_gettimeofday(&start); + do { + long size, highmem_size; + + highmem_size = count_highmem_pages(); + size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; + tmp = size; + size += highmem_size; + for_each_populated_zone(zone) { + tmp += snapshot_additional_pages(zone); + if (is_highmem(zone)) { + highmem_size -= + zone_page_state(zone, NR_FREE_PAGES); + } else { + tmp -= zone_page_state(zone, NR_FREE_PAGES); + tmp += zone->lowmem_reserve[ZONE_NORMAL]; + } + } + + if (highmem_size < 0) + highmem_size = 0; + + tmp += highmem_size; + if (tmp > 0) { + tmp = __shrink_memory(tmp); + if (!tmp) + return -ENOMEM; + pages += tmp; + } else if (size > image_size / PAGE_SIZE) { + tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); + pages += tmp; + } + printk("\b%c", p[i++%4]); + } while (tmp > 0); + do_gettimeofday(&stop); + printk("\bdone (%lu pages freed)\n", pages); + swsusp_show_speed(&start, &stop, pages, "Freed"); + + return 0; +} + #ifdef CONFIG_HIGHMEM /** * count_pages_for_highmem - compute the number of non-highmem pages diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c new file mode 100644 index 0000000..6f10dfc --- /dev/null +++ b/kernel/power/suspend.c @@ -0,0 +1,300 @@ +/* + * kernel/power/suspend.c - Suspend to RAM and standby functionality. + * + * Copyright (c) 2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/console.h> +#include <linux/cpu.h> +#include <linux/syscalls.h> + +#include "power.h" + +const char *const pm_states[PM_SUSPEND_MAX] = { + [PM_SUSPEND_STANDBY] = "standby", + [PM_SUSPEND_MEM] = "mem", +}; + +static struct platform_suspend_ops *suspend_ops; + +/** + * suspend_set_ops - Set the global suspend method table. + * @ops: Pointer to ops structure. + */ +void suspend_set_ops(struct platform_suspend_ops *ops) +{ + mutex_lock(&pm_mutex); + suspend_ops = ops; + mutex_unlock(&pm_mutex); +} + +bool valid_state(suspend_state_t state) +{ + /* + * All states need lowlevel support and need to be valid to the lowlevel + * implementation, no valid callback implies that none are valid. + */ + return suspend_ops && suspend_ops->valid && suspend_ops->valid(state); +} + +/** + * suspend_valid_only_mem - generic memory-only valid callback + * + * Platform drivers that implement mem suspend only and only need + * to check for that in their .valid callback can use this instead + * of rolling their own .valid callback. + */ +int suspend_valid_only_mem(suspend_state_t state) +{ + return state == PM_SUSPEND_MEM; +} + +static int suspend_test(int level) +{ +#ifdef CONFIG_PM_DEBUG + if (pm_test_level == level) { + printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n"); + mdelay(5000); + return 1; + } +#endif /* !CONFIG_PM_DEBUG */ + return 0; +} + +/** + * suspend_prepare - Do prep work before entering low-power state. + * + * This is common code that is called for each state that we're entering. + * Run suspend notifiers, allocate a console and stop all processes. + */ +static int suspend_prepare(void) +{ + int error; + + if (!suspend_ops || !suspend_ops->enter) + return -EPERM; + + pm_prepare_console(); + + error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); + if (error) + goto Finish; + + error = usermodehelper_disable(); + if (error) + goto Finish; + + error = suspend_freeze_processes(); + if (!error) + return 0; + + suspend_thaw_processes(); + usermodehelper_enable(); + Finish: + pm_notifier_call_chain(PM_POST_SUSPEND); + pm_restore_console(); + return error; +} + +/* default implementation */ +void __attribute__ ((weak)) arch_suspend_disable_irqs(void) +{ + local_irq_disable(); +} + +/* default implementation */ +void __attribute__ ((weak)) arch_suspend_enable_irqs(void) +{ + local_irq_enable(); +} + +/** + * suspend_enter - enter the desired system sleep state. + * @state: state to enter + * + * This function should be called after devices have been suspended. + */ +static int suspend_enter(suspend_state_t state) +{ + int error; + + if (suspend_ops->prepare) { + error = suspend_ops->prepare(); + if (error) + return error; + } + + error = dpm_suspend_noirq(PMSG_SUSPEND); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down\n"); + goto Platfrom_finish; + } + + if (suspend_ops->prepare_late) { + error = suspend_ops->prepare_late(); + if (error) + goto Power_up_devices; + } + + if (suspend_test(TEST_PLATFORM)) + goto Platform_wake; + + error = disable_nonboot_cpus(); + if (error || suspend_test(TEST_CPUS)) + goto Enable_cpus; + + arch_suspend_disable_irqs(); + BUG_ON(!irqs_disabled()); + + error = sysdev_suspend(PMSG_SUSPEND); + if (!error) { + if (!suspend_test(TEST_CORE)) + error = suspend_ops->enter(state); + sysdev_resume(); + } + + arch_suspend_enable_irqs(); + BUG_ON(irqs_disabled()); + + Enable_cpus: + enable_nonboot_cpus(); + + Platform_wake: + if (suspend_ops->wake) + suspend_ops->wake(); + + Power_up_devices: + dpm_resume_noirq(PMSG_RESUME); + + Platfrom_finish: + if (suspend_ops->finish) + suspend_ops->finish(); + + return error; +} + +/** + * suspend_devices_and_enter - suspend devices and enter the desired system + * sleep state. + * @state: state to enter + */ +int suspend_devices_and_enter(suspend_state_t state) +{ + int error; + + if (!suspend_ops) + return -ENOSYS; + + if (suspend_ops->begin) { + error = suspend_ops->begin(state); + if (error) + goto Close; + } + suspend_console(); + suspend_test_start(); + error = dpm_suspend_start(PMSG_SUSPEND); + if (error) { + printk(KERN_ERR "PM: Some devices failed to suspend\n"); + goto Recover_platform; + } + suspend_test_finish("suspend devices"); + if (suspend_test(TEST_DEVICES)) + goto Recover_platform; + + suspend_enter(state); + + Resume_devices: + suspend_test_start(); + dpm_resume_end(PMSG_RESUME); + suspend_test_finish("resume devices"); + resume_console(); + Close: + if (suspend_ops->end) + suspend_ops->end(); + return error; + + Recover_platform: + if (suspend_ops->recover) + suspend_ops->recover(); + goto Resume_devices; +} + +/** + * suspend_finish - Do final work before exiting suspend sequence. + * + * Call platform code to clean up, restart processes, and free the + * console that we've allocated. This is not called for suspend-to-disk. + */ +static void suspend_finish(void) +{ + suspend_thaw_processes(); + usermodehelper_enable(); + pm_notifier_call_chain(PM_POST_SUSPEND); + pm_restore_console(); +} + +/** + * enter_state - Do common work of entering low-power state. + * @state: pm_state structure for state we're entering. + * + * Make sure we're the only ones trying to enter a sleep state. Fail + * if someone has beat us to it, since we don't want anything weird to + * happen when we wake up. + * Then, do the setup for suspend, enter the state, and cleaup (after + * we've woken up). + */ +int enter_state(suspend_state_t state) +{ + int error; + + if (!valid_state(state)) + return -ENODEV; + + if (!mutex_trylock(&pm_mutex)) + return -EBUSY; + + printk(KERN_INFO "PM: Syncing filesystems ... "); + sys_sync(); + printk("done.\n"); + + pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); + error = suspend_prepare(); + if (error) + goto Unlock; + + if (suspend_test(TEST_FREEZER)) + goto Finish; + + pr_debug("PM: Entering %s sleep\n", pm_states[state]); + error = suspend_devices_and_enter(state); + + Finish: + pr_debug("PM: Finishing wakeup.\n"); + suspend_finish(); + Unlock: + mutex_unlock(&pm_mutex); + return error; +} + +/** + * pm_suspend - Externally visible function for suspending system. + * @state: Enumerated value of state to enter. + * + * Determine whether or not value is within range, get state + * structure, and enter (above). + */ +int pm_suspend(suspend_state_t state) +{ + if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) + return enter_state(state); + return -EINVAL; +} +EXPORT_SYMBOL(pm_suspend); diff --git a/kernel/power/suspend_test.c b/kernel/power/suspend_test.c new file mode 100644 index 0000000..17d8bb1 --- /dev/null +++ b/kernel/power/suspend_test.c @@ -0,0 +1,187 @@ +/* + * kernel/power/suspend_test.c - Suspend to RAM and standby test facility. + * + * Copyright (c) 2009 Pavel Machek <pavel@ucw.cz> + * + * This file is released under the GPLv2. + */ + +#include <linux/init.h> +#include <linux/rtc.h> + +#include "power.h" + +/* + * We test the system suspend code by setting an RTC wakealarm a short + * time in the future, then suspending. Suspending the devices won't + * normally take long ... some systems only need a few milliseconds. + * + * The time it takes is system-specific though, so when we test this + * during system bootup we allow a LOT of time. + */ +#define TEST_SUSPEND_SECONDS 5 + +static unsigned long suspend_test_start_time; + +void suspend_test_start(void) +{ + /* FIXME Use better timebase than "jiffies", ideally a clocksource. + * What we want is a hardware counter that will work correctly even + * during the irqs-are-off stages of the suspend/resume cycle... + */ + suspend_test_start_time = jiffies; +} + +void suspend_test_finish(const char *label) +{ + long nj = jiffies - suspend_test_start_time; + unsigned msec; + + msec = jiffies_to_msecs(abs(nj)); + pr_info("PM: %s took %d.%03d seconds\n", label, + msec / 1000, msec % 1000); + + /* Warning on suspend means the RTC alarm period needs to be + * larger -- the system was sooo slooowwww to suspend that the + * alarm (should have) fired before the system went to sleep! + * + * Warning on either suspend or resume also means the system + * has some performance issues. The stack dump of a WARN_ON + * is more likely to get the right attention than a printk... + */ + WARN(msec > (TEST_SUSPEND_SECONDS * 1000), "Component: %s\n", label); +} + +/* + * To test system suspend, we need a hands-off mechanism to resume the + * system. RTCs wake alarms are a common self-contained mechanism. + */ + +static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) +{ + static char err_readtime[] __initdata = + KERN_ERR "PM: can't read %s time, err %d\n"; + static char err_wakealarm [] __initdata = + KERN_ERR "PM: can't set %s wakealarm, err %d\n"; + static char err_suspend[] __initdata = + KERN_ERR "PM: suspend test failed, error %d\n"; + static char info_test[] __initdata = + KERN_INFO "PM: test RTC wakeup from '%s' suspend\n"; + + unsigned long now; + struct rtc_wkalrm alm; + int status; + + /* this may fail if the RTC hasn't been initialized */ + status = rtc_read_time(rtc, &alm.time); + if (status < 0) { + printk(err_readtime, dev_name(&rtc->dev), status); + return; + } + rtc_tm_to_time(&alm.time, &now); + + memset(&alm, 0, sizeof alm); + rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time); + alm.enabled = true; + + status = rtc_set_alarm(rtc, &alm); + if (status < 0) { + printk(err_wakealarm, dev_name(&rtc->dev), status); + return; + } + + if (state == PM_SUSPEND_MEM) { + printk(info_test, pm_states[state]); + status = pm_suspend(state); + if (status == -ENODEV) + state = PM_SUSPEND_STANDBY; + } + if (state == PM_SUSPEND_STANDBY) { + printk(info_test, pm_states[state]); + status = pm_suspend(state); + } + if (status < 0) + printk(err_suspend, status); + + /* Some platforms can't detect that the alarm triggered the + * wakeup, or (accordingly) disable it after it afterwards. + * It's supposed to give oneshot behavior; cope. + */ + alm.enabled = false; + rtc_set_alarm(rtc, &alm); +} + +static int __init has_wakealarm(struct device *dev, void *name_ptr) +{ + struct rtc_device *candidate = to_rtc_device(dev); + + if (!candidate->ops->set_alarm) + return 0; + if (!device_may_wakeup(candidate->dev.parent)) + return 0; + + *(const char **)name_ptr = dev_name(dev); + return 1; +} + +/* + * Kernel options like "test_suspend=mem" force suspend/resume sanity tests + * at startup time. They're normally disabled, for faster boot and because + * we can't know which states really work on this particular system. + */ +static suspend_state_t test_state __initdata = PM_SUSPEND_ON; + +static char warn_bad_state[] __initdata = + KERN_WARNING "PM: can't test '%s' suspend state\n"; + +static int __init setup_test_suspend(char *value) +{ + unsigned i; + + /* "=mem" ==> "mem" */ + value++; + for (i = 0; i < PM_SUSPEND_MAX; i++) { + if (!pm_states[i]) + continue; + if (strcmp(pm_states[i], value) != 0) + continue; + test_state = (__force suspend_state_t) i; + return 0; + } + printk(warn_bad_state, value); + return 0; +} +__setup("test_suspend", setup_test_suspend); + +static int __init test_suspend(void) +{ + static char warn_no_rtc[] __initdata = + KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n"; + + char *pony = NULL; + struct rtc_device *rtc = NULL; + + /* PM is initialized by now; is that state testable? */ + if (test_state == PM_SUSPEND_ON) + goto done; + if (!valid_state(test_state)) { + printk(warn_bad_state, pm_states[test_state]); + goto done; + } + + /* RTCs have initialized by now too ... can we use one? */ + class_find_device(rtc_class, NULL, &pony, has_wakealarm); + if (pony) + rtc = rtc_class_open(pony); + if (!rtc) { + printk(warn_no_rtc); + goto done; + } + + /* go for it */ + test_wakealarm(rtc, test_state); + rtc_class_close(rtc); +done: + return 0; +} +late_initcall(test_suspend); diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 78c3504..6a07f4d 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -55,14 +55,6 @@ #include "power.h" -/* - * Preferred image size in bytes (tunable via /sys/power/image_size). - * When it is set to N, swsusp will do its best to ensure the image - * size will not exceed N bytes, but if that is impossible, it will - * try to create the smallest image possible. - */ -unsigned long image_size = 500 * 1024 * 1024; - int in_suspend __nosavedata = 0; /** @@ -194,193 +186,3 @@ void swsusp_show_speed(struct timeval *start, struct timeval *stop, centisecs / 100, centisecs % 100, kps / 1000, (kps % 1000) / 10); } - -/** - * swsusp_shrink_memory - Try to free as much memory as needed - * - * ... but do not OOM-kill anyone - * - * Notice: all userland should be stopped before it is called, or - * livelock is possible. - */ - -#define SHRINK_BITE 10000 -static inline unsigned long __shrink_memory(long tmp) -{ - if (tmp > SHRINK_BITE) - tmp = SHRINK_BITE; - return shrink_all_memory(tmp); -} - -int swsusp_shrink_memory(void) -{ - long tmp; - struct zone *zone; - unsigned long pages = 0; - unsigned int i = 0; - char *p = "-\\|/"; - struct timeval start, stop; - - printk(KERN_INFO "PM: Shrinking memory... "); - do_gettimeofday(&start); - do { - long size, highmem_size; - - highmem_size = count_highmem_pages(); - size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; - tmp = size; - size += highmem_size; - for_each_populated_zone(zone) { - tmp += snapshot_additional_pages(zone); - if (is_highmem(zone)) { - highmem_size -= - zone_page_state(zone, NR_FREE_PAGES); - } else { - tmp -= zone_page_state(zone, NR_FREE_PAGES); - tmp += zone->lowmem_reserve[ZONE_NORMAL]; - } - } - - if (highmem_size < 0) - highmem_size = 0; - - tmp += highmem_size; - if (tmp > 0) { - tmp = __shrink_memory(tmp); - if (!tmp) - return -ENOMEM; - pages += tmp; - } else if (size > image_size / PAGE_SIZE) { - tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); - pages += tmp; - } - printk("\b%c", p[i++%4]); - } while (tmp > 0); - do_gettimeofday(&stop); - printk("\bdone (%lu pages freed)\n", pages); - swsusp_show_speed(&start, &stop, pages, "Freed"); - - return 0; -} - -/* - * Platforms, like ACPI, may want us to save some memory used by them during - * hibernation and to restore the contents of this memory during the subsequent - * resume. The code below implements a mechanism allowing us to do that. - */ - -struct nvs_page { - unsigned long phys_start; - unsigned int size; - void *kaddr; - void *data; - struct list_head node; -}; - -static LIST_HEAD(nvs_list); - -/** - * hibernate_nvs_register - register platform NVS memory region to save - * @start - physical address of the region - * @size - size of the region - * - * The NVS region need not be page-aligned (both ends) and we arrange - * things so that the data from page-aligned addresses in this region will - * be copied into separate RAM pages. - */ -int hibernate_nvs_register(unsigned long start, unsigned long size) -{ - struct nvs_page *entry, *next; - - while (size > 0) { - unsigned int nr_bytes; - - entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); - if (!entry) - goto Error; - - list_add_tail(&entry->node, &nvs_list); - entry->phys_start = start; - nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); - entry->size = (size < nr_bytes) ? size : nr_bytes; - - start += entry->size; - size -= entry->size; - } - return 0; - - Error: - list_for_each_entry_safe(entry, next, &nvs_list, node) { - list_del(&entry->node); - kfree(entry); - } - return -ENOMEM; -} - -/** - * hibernate_nvs_free - free data pages allocated for saving NVS regions - */ -void hibernate_nvs_free(void) -{ - struct nvs_page *entry; - - list_for_each_entry(entry, &nvs_list, node) - if (entry->data) { - free_page((unsigned long)entry->data); - entry->data = NULL; - if (entry->kaddr) { - iounmap(entry->kaddr); - entry->kaddr = NULL; - } - } -} - -/** - * hibernate_nvs_alloc - allocate memory necessary for saving NVS regions - */ -int hibernate_nvs_alloc(void) -{ - struct nvs_page *entry; - - list_for_each_entry(entry, &nvs_list, node) { - entry->data = (void *)__get_free_page(GFP_KERNEL); - if (!entry->data) { - hibernate_nvs_free(); - return -ENOMEM; - } - } - return 0; -} - -/** - * hibernate_nvs_save - save NVS memory regions - */ -void hibernate_nvs_save(void) -{ - struct nvs_page *entry; - - printk(KERN_INFO "PM: Saving platform NVS memory\n"); - - list_for_each_entry(entry, &nvs_list, node) - if (entry->data) { - entry->kaddr = ioremap(entry->phys_start, entry->size); - memcpy(entry->data, entry->kaddr, entry->size); - } -} - -/** - * hibernate_nvs_restore - restore NVS memory regions - * - * This function is going to be called with interrupts disabled, so it - * cannot iounmap the virtual addresses used to access the NVS region. - */ -void hibernate_nvs_restore(void) -{ - struct nvs_page *entry; - - printk(KERN_INFO "PM: Restoring platform NVS memory\n"); - - list_for_each_entry(entry, &nvs_list, node) - if (entry->data) - memcpy(entry->kaddr, entry->data, entry->size); -} diff --git a/mm/vmscan.c b/mm/vmscan.c index d254306..95c08a8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2056,7 +2056,7 @@ unsigned long global_lru_pages(void) + global_page_state(NR_INACTIVE_FILE); } -#ifdef CONFIG_PM +#ifdef CONFIG_HIBERNATION /* * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages * from LRU lists system-wide, for given pass and priority. @@ -2196,7 +2196,7 @@ out: return sc.nr_reclaimed; } -#endif +#endif /* CONFIG_HIBERNATION */ /* It's optimal to keep kswapds on the same CPUs as their memory, but not required for correctness. So if the last cpu in a node goes |