From 1e817fb62cd185a2232ad4302579491805609489 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 19 Nov 2012 10:26:16 -0800 Subject: time: create __getnstimeofday for WARNless calls The pstore RAM backend can get called during resume, and must be defensive against a suspended time source. Expose getnstimeofday logic that returns an error instead of a WARN. This can be detected and the timestamp can be zeroed out. Reported-by: Doug Anderson Cc: John Stultz Cc: Anton Vorontsov Signed-off-by: Kees Cook Signed-off-by: John Stultz diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 1a4f6da..dacfe78 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -168,12 +168,16 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) { char *hdr; - struct timeval timestamp; + struct timespec timestamp; size_t len; - do_gettimeofday(×tamp); + /* Report zeroed timestamp if called before timekeeping has resumed. */ + if (__getnstimeofday(×tamp)) { + timestamp.tv_sec = 0; + timestamp.tv_nsec = 0; + } hdr = kasprintf(GFP_ATOMIC, RAMOOPS_KERNMSG_HDR "%lu.%lu\n", - (long)timestamp.tv_sec, (long)timestamp.tv_usec); + (long)timestamp.tv_sec, (long)(timestamp.tv_nsec / 1000)); WARN_ON_ONCE(!hdr); len = hdr ? strlen(hdr) : 0; persistent_ram_write(prz, hdr, len); diff --git a/include/linux/time.h b/include/linux/time.h index 4d358e9..0015aea 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -158,6 +158,7 @@ extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); extern unsigned int alarm_setitimer(unsigned int seconds); extern int do_getitimer(int which, struct itimerval *value); +extern int __getnstimeofday(struct timespec *tv); extern void getnstimeofday(struct timespec *tv); extern void getrawmonotonic(struct timespec *ts); extern void getnstime_raw_and_real(struct timespec *ts_raw, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4c7de02..dfc7f87 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -214,19 +214,18 @@ static void timekeeping_forward_now(struct timekeeper *tk) } /** - * getnstimeofday - Returns the time of day in a timespec + * __getnstimeofday - Returns the time of day in a timespec. * @ts: pointer to the timespec to be set * - * Returns the time of day in a timespec. + * Updates the time of day in the timespec. + * Returns 0 on success, or -ve when suspended (timespec will be undefined). */ -void getnstimeofday(struct timespec *ts) +int __getnstimeofday(struct timespec *ts) { struct timekeeper *tk = &timekeeper; unsigned long seq; s64 nsecs = 0; - WARN_ON(timekeeping_suspended); - do { seq = read_seqbegin(&tk->lock); @@ -237,6 +236,26 @@ void getnstimeofday(struct timespec *ts) ts->tv_nsec = 0; timespec_add_ns(ts, nsecs); + + /* + * Do not bail out early, in case there were callers still using + * the value, even in the face of the WARN_ON. + */ + if (unlikely(timekeeping_suspended)) + return -EAGAIN; + return 0; +} +EXPORT_SYMBOL(__getnstimeofday); + +/** + * getnstimeofday - Returns the time of day in a timespec. + * @ts: pointer to the timespec to be set + * + * Returns the time of day in a timespec (WARN if suspended). + */ +void getnstimeofday(struct timespec *ts) +{ + WARN_ON(__getnstimeofday(ts)); } EXPORT_SYMBOL(getnstimeofday); -- cgit v0.10.2 From 503637375269e33f368fd3484a199beace01f36e Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 13 Dec 2012 13:08:47 -0500 Subject: MAINTAINERS: Update John Stultz's email Update my email address. Signed-off-by: John Stultz diff --git a/MAINTAINERS b/MAINTAINERS index 59203e7..b908912 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6282,7 +6282,7 @@ F: drivers/dma/dw_dmac_regs.h F: drivers/dma/dw_dmac.c TIMEKEEPING, NTP -M: John Stultz +M: John Stultz M: Thomas Gleixner T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/core S: Supported -- cgit v0.10.2 From 023f333a99cee9b5cd3268ff87298eb01a31f78e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 17 Dec 2012 14:30:53 -0700 Subject: NTP: Add a CONFIG_RTC_SYSTOHC configuration The purpose of this option is to allow ARM/etc systems that rely on the class RTC subsystem to have the same kind of automatic NTP based synchronization that we have on PC platforms. Today ARM does not implement update_persistent_clock and makes extensive use of the class RTC system. When enabled CONFIG_RTC_SYSTOHC will provide a generic rtc_update_persistent_clock that stores the current time in the RTC and is intended complement the existing CONFIG_RTC_HCTOSYS option that loads the RTC at boot. Like with RTC_HCTOSYS the platform's update_persistent_clock is used first, if it works. Platforms with mixed class RTC and non-RTC drivers need to return ENODEV when class RTC should be used. Such an update for PPC is included in this patch. Long term, implementations of update_persistent_clock should migrate to proper class RTC drivers and use CONFIG_RTC_SYSTOHC instead. Tested on ARM kirkwood and PPC405 Signed-off-by: Jason Gunthorpe Signed-off-by: John Stultz diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index ce4cb77..bc844a8 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -667,7 +667,7 @@ int update_persistent_clock(struct timespec now) struct rtc_time tm; if (!ppc_md.set_rtc_time) - return 0; + return -ENODEV; to_tm(now.tv_sec + 1 + timezone_offset, &tm); tm.tm_year -= 1900; diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 19c03ab..b377e96 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -25,9 +25,17 @@ config RTC_HCTOSYS the value read from a specified RTC device. This is useful to avoid unnecessary fsck runs at boot time, and to network better. +config RTC_SYSTOHC + bool "Set the RTC time based on NTP synchronization" + default y + help + If you say yes here, the system time (wall clock) will be stored + in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11 + minutes if userspace reports synchronized NTP status. + config RTC_HCTOSYS_DEVICE string "RTC used to set the system time" - depends on RTC_HCTOSYS = y + depends on RTC_HCTOSYS = y || RTC_SYSTOHC = y default "rtc0" help The RTC device that will be used to (re)initialize the system diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 56297f0..69d11f1 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -6,6 +6,7 @@ ccflags-$(CONFIG_RTC_DEBUG) := -DDEBUG obj-$(CONFIG_RTC_LIB) += rtc-lib.o obj-$(CONFIG_RTC_HCTOSYS) += hctosys.o +obj-$(CONFIG_RTC_SYSTOHC) += systohc.o obj-$(CONFIG_RTC_CLASS) += rtc-core.o rtc-core-y := class.o interface.o diff --git a/drivers/rtc/systohc.c b/drivers/rtc/systohc.c new file mode 100644 index 0000000..bf3e242 --- /dev/null +++ b/drivers/rtc/systohc.c @@ -0,0 +1,44 @@ +/* + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + */ +#include +#include + +/** + * rtc_set_ntp_time - Save NTP synchronized time to the RTC + * @now: Current time of day + * + * Replacement for the NTP platform function update_persistent_clock + * that stores time for later retrieval by rtc_hctosys. + * + * Returns 0 on successful RTC update, -ENODEV if a RTC update is not + * possible at all, and various other -errno for specific temporary failure + * cases. + * + * If temporary failure is indicated the caller should try again 'soon' + */ +int rtc_set_ntp_time(struct timespec now) +{ + struct rtc_device *rtc; + struct rtc_time tm; + int err = -ENODEV; + + if (now.tv_nsec < (NSEC_PER_SEC >> 1)) + rtc_time_to_tm(now.tv_sec, &tm); + else + rtc_time_to_tm(now.tv_sec + 1, &tm); + + rtc = rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE); + if (rtc) { + /* rtc_hctosys exclusively uses UTC, so we call set_time here, + * not set_mmss. */ + if (rtc->ops && (rtc->ops->set_time || rtc->ops->set_mmss)) + err = rtc_set_time(rtc, &tm); + rtc_class_close(rtc); + } + + return err; +} diff --git a/include/linux/rtc.h b/include/linux/rtc.h index 9531845c..11d05f9 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -138,6 +138,7 @@ extern void rtc_device_unregister(struct rtc_device *rtc); extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs); +extern int rtc_set_ntp_time(struct timespec now); int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); extern int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 24174b4..313b161 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "tick-internal.h" @@ -483,8 +484,7 @@ out: return leap; } -#ifdef CONFIG_GENERIC_CMOS_UPDATE - +#if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC) static void sync_cmos_clock(struct work_struct *work); static DECLARE_DELAYED_WORK(sync_cmos_work, sync_cmos_clock); @@ -510,14 +510,22 @@ static void sync_cmos_clock(struct work_struct *work) } getnstimeofday(&now); - if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) + if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) { + fail = -ENODEV; +#ifdef CONFIG_GENERIC_CMOS_UPDATE fail = update_persistent_clock(now); +#endif +#ifdef CONFIG_RTC_SYSTOHC + if (fail == -ENODEV) + fail = rtc_set_ntp_time(now); +#endif + } next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2); if (next.tv_nsec <= 0) next.tv_nsec += NSEC_PER_SEC; - if (!fail) + if (!fail || fail == -ENODEV) next.tv_sec = 659; else next.tv_sec = 0; -- cgit v0.10.2 From 2353b47bffe4e6ab39042f470c55d41bb3ff3846 Mon Sep 17 00:00:00 2001 From: Bernd Faust Date: Wed, 5 Dec 2012 15:16:49 +0100 Subject: Round the calculated scale factor in set_cyc2ns_scale() During some experiments with an external clock (in a FPGA), we saw that the TSC clock drifted approx. 2.5ms per second. This drift was caused by the current way of calculating the scale. In our case cpu_khz had a value of 3292725. This resulted in a scale value of 310. But when doing the calculation by hand it shows that the actual value is 310.9886188491, so a value of 311 would be more precise. With this change the value is rounded. Signed-off-by: Bernd Faust Signed-off-by: John Stultz diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index cfa5d4f..8ed0857 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -617,7 +617,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) ns_now = __cycles_2_ns(tsc_now); if (cpu_khz) { - *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) + + cpu_khz / 2) / cpu_khz; *offset = ns_now - mult_frac(tsc_now, *scale, (1UL << CYC2NS_SCALE_FACTOR)); } -- cgit v0.10.2 From f0dbe81f0e7c39783ad25d9084bbcda131508993 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Fri, 11 Jan 2013 11:58:58 +0100 Subject: posix-timers: Fix clock_adjtime to always return timex data on success The clock_adj call returns the clock state on success, which may be a non-zero value (e.g. TIME_INS), but the modified timex data is copied back to the user only when zero value (TIME_OK) was returned. Fix the condition to copy the data also with positive return values. Signed-off-by: Miroslav Lichvar Signed-off-by: John Stultz diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 69185ae..10349d5 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -997,7 +997,7 @@ SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, err = kc->clock_adj(which_clock, &ktx); - if (!err && copy_to_user(utx, &ktx, sizeof(ktx))) + if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx))) return -EFAULT; return err; -- cgit v0.10.2 From 31ade30692dc9680bfc95700d794818fa3f754ac Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 16 Jan 2013 00:09:47 +0800 Subject: timekeeping: Add persistent_clock_exist flag In current kernel, there are several places which need to check whether there is a persistent clock for the platform. Current check is done by calling the read_persistent_clock() and validating its return value. So one optimization is to do the check only once in timekeeping_init(), and use a flag persistent_clock_exist to record it. v2: Add a has_persistent_clock() helper function, as suggested by John. Cc: Thomas Gleixner Cc: John Stultz Signed-off-by: Feng Tang Signed-off-by: John Stultz diff --git a/include/linux/time.h b/include/linux/time.h index 0015aea..dfbc4e8 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -115,6 +115,12 @@ static inline bool timespec_valid_strict(const struct timespec *ts) return true; } +extern bool persistent_clock_exist; +static inline bool has_persistent_clock(void) +{ + return persistent_clock_exist; +} + extern void read_persistent_clock(struct timespec *ts); extern void read_boot_clock(struct timespec *ts); extern int update_persistent_clock(struct timespec now); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index dfc7f87..b7a5841 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -28,6 +28,9 @@ static struct timekeeper timekeeper; /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; +/* Flag for if there is a persistent clock on this platform */ +bool __read_mostly persistent_clock_exist = false; + static inline void tk_normalize_xtime(struct timekeeper *tk) { while (tk->xtime_nsec >= ((u64)NSEC_PER_SEC << tk->shift)) { @@ -609,12 +612,14 @@ void __init timekeeping_init(void) struct timespec now, boot, tmp; read_persistent_clock(&now); + if (!timespec_valid_strict(&now)) { pr_warn("WARNING: Persistent clock returned invalid value!\n" " Check your CMOS/BIOS settings.\n"); now.tv_sec = 0; now.tv_nsec = 0; - } + } else if (now.tv_sec || now.tv_nsec) + persistent_clock_exist = true; read_boot_clock(&boot); if (!timespec_valid_strict(&boot)) { @@ -687,11 +692,12 @@ void timekeeping_inject_sleeptime(struct timespec *delta) { struct timekeeper *tk = &timekeeper; unsigned long flags; - struct timespec ts; - /* Make sure we don't set the clock twice */ - read_persistent_clock(&ts); - if (!(ts.tv_sec == 0 && ts.tv_nsec == 0)) + /* + * Make sure we don't set the clock twice, as timekeeping_resume() + * already did it + */ + if (has_persistent_clock()) return; write_seqlock_irqsave(&tk->lock, flags); -- cgit v0.10.2 From 9ecf37eb7a81e3295a1b274eafb6f83d7d2cabf0 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 16 Jan 2013 00:09:48 +0800 Subject: rtc: Skip the suspend/resume handling if persistent clock exist MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the RTC suspend and resume functions are to compensate the sleep time, but this is already done in timekeeping.c if persistent clock exist. Cc: Thomas Gleixner Cc: John Stultz Cc: Alessandro Zummo Cc: Arve Hjønnevåg Signed-off-by: Feng Tang Signed-off-by: John Stultz diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c index f8a0aab..c01773f 100644 --- a/drivers/rtc/class.c +++ b/drivers/rtc/class.c @@ -50,6 +50,10 @@ static int rtc_suspend(struct device *dev, pm_message_t mesg) struct rtc_device *rtc = to_rtc_device(dev); struct rtc_time tm; struct timespec delta, delta_delta; + + if (has_persistent_clock()) + return 0; + if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) return 0; @@ -88,6 +92,9 @@ static int rtc_resume(struct device *dev) struct timespec new_system, new_rtc; struct timespec sleep_time; + if (has_persistent_clock()) + return 0; + rtc_hctosys_ret = -ENODEV; if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0) return 0; -- cgit v0.10.2 From 05ad717c77b1b8e98a1dd768c3700036d634629e Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 16 Jan 2013 00:09:49 +0800 Subject: timekeeping: Add CONFIG_HAS_PERSISTENT_CLOCK option Make the persistent clock check a kernel config option, so that some platform can explicitely select it, also make CONFIG_RTC_HCTOSYS and RTC_SYSTOHC depend on its non-existence, which could prevent the persistent clock and RTC code from doing similar thing twice during system's init/suspend/resume phases. If the CONFIG_HAS_PERSISTENT_CLOCK=n, then no change happens for kernel which still does the persistent clock check in timekeeping_init(). Cc: Thomas Gleixner Suggested-by: John Stultz Signed-off-by: Feng Tang [jstultz: Added dependency for RTC_SYSTOHC as well] Signed-off-by: John Stultz diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index b377e96..05761de 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -20,6 +20,7 @@ if RTC_CLASS config RTC_HCTOSYS bool "Set system time from RTC on startup and resume" default y + depends on !HAS_PERSISTENT_CLOCK help If you say yes here, the system time (wall clock) will be set using the value read from a specified RTC device. This is useful to avoid @@ -28,6 +29,7 @@ config RTC_HCTOSYS config RTC_SYSTOHC bool "Set the RTC time based on NTP synchronization" default y + depends on !HAS_PERSISTENT_CLOCK help If you say yes here, the system time (wall clock) will be stored in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11 diff --git a/include/linux/time.h b/include/linux/time.h index dfbc4e8..369b6e3 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -116,10 +116,15 @@ static inline bool timespec_valid_strict(const struct timespec *ts) } extern bool persistent_clock_exist; + +#ifdef CONFIG_HAS_PERSISTENT_CLOCK +#define has_persistent_clock() true +#else static inline bool has_persistent_clock(void) { return persistent_clock_exist; } +#endif extern void read_persistent_clock(struct timespec *ts); extern void read_boot_clock(struct timespec *ts); diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 8601f0d..f7e45b9 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -12,6 +12,11 @@ config CLOCKSOURCE_WATCHDOG config ARCH_CLOCKSOURCE_DATA bool +# Platforms has a persistent clock +config HAS_PERSISTENT_CLOCK + bool + default n + # Timekeeping vsyscall support config GENERIC_TIME_VSYSCALL bool -- cgit v0.10.2 From e90c83f757fffdacec8b3c5eee5617dcc038338f Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 15 Jan 2013 19:45:19 +0000 Subject: x86: Select HAS_PERSISTENT_CLOCK on x86 Select HAS_PERSISTENT_CLOCK on x86 to simplify RTC options and allow the compiler to remove unused code. Signed-off-by: John Stultz diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46c3bff..a4135b5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -108,6 +108,7 @@ config X86 select GENERIC_STRNLEN_USER select HAVE_RCU_USER_QS if X86_64 select HAVE_IRQ_TIME_ACCOUNTING + select HAS_PERSISTENT_CLOCK select GENERIC_KERNEL_THREAD select GENERIC_KERNEL_EXECVE select MODULES_USE_ELF_REL if X86_32 -- cgit v0.10.2 From 6125bc8b86d9da75ddac77e38f41afbf9f5de3e3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 4 Jan 2013 15:41:47 -0700 Subject: x86/time/rtc: Don't print extended CMOS year when reading RTC We shouldn't print the current century every time we read the RTC. Signed-off-by: Bjorn Helgaas Acked-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20130104224146.15189.14874.stgit@bhelgaas.mtv.corp.google.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 801602b..2e8f3d3 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -149,7 +149,6 @@ unsigned long mach_get_cmos_time(void) if (century) { century = bcd2bin(century); year += century * 100; - printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); } else year += CMOS_YEARS_OFFS; -- cgit v0.10.2 From 6f16eebe1ff82176339a0439c98ebec9768b0ee2 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 25 Jan 2013 17:08:12 -0800 Subject: timekeeping: Switch HAS_PERSISTENT_CLOCK to ALWAYS_USE_PERSISTENT_CLOCK Jason pointed out the HAS_PERSISTENT_CLOCK name isn't quite accurate for the config, as some systems may have the persistent_clock in some cases, but not always. So change the config name to the more clear ALWAYS_USE_PERSISTENT_CLOCK. Signed-off-by: John Stultz diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a4135b5..335da90 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -108,7 +108,7 @@ config X86 select GENERIC_STRNLEN_USER select HAVE_RCU_USER_QS if X86_64 select HAVE_IRQ_TIME_ACCOUNTING - select HAS_PERSISTENT_CLOCK + select ALWAYS_USE_PERSISTENT_CLOCK select GENERIC_KERNEL_THREAD select GENERIC_KERNEL_EXECVE select MODULES_USE_ELF_REL if X86_32 diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 05761de..da60de0 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -20,7 +20,7 @@ if RTC_CLASS config RTC_HCTOSYS bool "Set system time from RTC on startup and resume" default y - depends on !HAS_PERSISTENT_CLOCK + depends on !ALWAYS_USE_PERSISTENT_CLOCK help If you say yes here, the system time (wall clock) will be set using the value read from a specified RTC device. This is useful to avoid @@ -29,7 +29,7 @@ config RTC_HCTOSYS config RTC_SYSTOHC bool "Set the RTC time based on NTP synchronization" default y - depends on !HAS_PERSISTENT_CLOCK + depends on !ALWAYS_USE_PERSISTENT_CLOCK help If you say yes here, the system time (wall clock) will be stored in the RTC specified by RTC_HCTOSYS_DEVICE approximately every 11 diff --git a/include/linux/time.h b/include/linux/time.h index 369b6e3..476e1d7 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -117,7 +117,7 @@ static inline bool timespec_valid_strict(const struct timespec *ts) extern bool persistent_clock_exist; -#ifdef CONFIG_HAS_PERSISTENT_CLOCK +#ifdef ALWAYS_USE_PERSISTENT_CLOCK #define has_persistent_clock() true #else static inline bool has_persistent_clock(void) diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index f7e45b9..0dddb9d 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -13,7 +13,7 @@ config ARCH_CLOCKSOURCE_DATA bool # Platforms has a persistent clock -config HAS_PERSISTENT_CLOCK +config ALWAYS_USE_PERSISTENT_CLOCK bool default n -- cgit v0.10.2 From 12572dbb53638c6e454ef831c8fee7de3df24389 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 14 Jan 2013 17:05:21 +0000 Subject: clockevents: Add generic timer broadcast receiver Currently the broadcast mechanism used for timers is abstracted by a function pointer on struct clock_event_device. As the fundamental mechanism for broadcast is architecture-specific, this ties each clock_event_device driver to a single architecture, even where the driver is otherwise generic. This patch adds a standard path for the receipt of timer broadcasts, so drivers and/or architecture backends need not manage redundant lists of timers for the purpose of routing broadcast timer ticks. [tglx: Made the implementation depend on the config switch as well ] Signed-off-by: Mark Rutland Reviewed-by: Santosh Shilimkar Cc: linux-arm-kernel@lists.infradead.org Cc: nico@linaro.org Cc: Will.Deacon@arm.com Cc: Marc.Zyngier@arm.com Cc: john.stultz@linaro.org Link: http://lkml.kernel.org/r/1358183124-28461-2-git-send-email-mark.rutland@arm.com Tested-by: Santosh Shilimkar Reviewed-by: Stephen Boyd Signed-off-by: Thomas Gleixner diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 8a7096f..e1089aa 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -161,6 +161,10 @@ clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec) extern void clockevents_suspend(void); extern void clockevents_resume(void); +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +extern int tick_receive_broadcast(void); +#endif + #ifdef CONFIG_GENERIC_CLOCKEVENTS extern void clockevents_notify(unsigned long reason, void *arg); #else diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f113755..7cc81c5 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -125,6 +125,23 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) return ret; } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +int tick_receive_broadcast(void) +{ + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + struct clock_event_device *evt = td->evtdev; + + if (!evt) + return -ENODEV; + + if (!evt->event_handler) + return -EINVAL; + + evt->event_handler(evt); + return 0; +} +#endif + /* * Broadcast the event to the cpus, which are set in the mask (mangled). */ -- cgit v0.10.2 From 12ad10004645d38356b14d1fbba379c523a61916 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 14 Jan 2013 17:05:22 +0000 Subject: clockevents: Add generic timer broadcast function Currently, the timer broadcast mechanism is defined by a function pointer on struct clock_event_device. As the fundamental mechanism for broadcast is architecture-specific, this means that clock_event_device drivers cannot be shared across multiple architectures. This patch adds an (optional) architecture-specific function for timer tick broadcast, allowing drivers which may require broadcast functionality to be shared across multiple architectures. Signed-off-by: Mark Rutland Reviewed-by: Santosh Shilimkar Cc: linux-arm-kernel@lists.infradead.org Cc: nico@linaro.org Cc: Will.Deacon@arm.com Cc: Marc.Zyngier@arm.com Cc: john.stultz@linaro.org Link: http://lkml.kernel.org/r/1358183124-28461-3-git-send-email-mark.rutland@arm.com Tested-by: Santosh Shilimkar Reviewed-by: Stephen Boyd Signed-off-by: Thomas Gleixner diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index e1089aa..6634652 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -162,6 +162,11 @@ extern void clockevents_suspend(void); extern void clockevents_resume(void); #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST +extern void tick_broadcast(const struct cpumask *mask); +#else +#define tick_broadcast NULL +#endif extern int tick_receive_broadcast(void); #endif diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 8601f0d..b696922 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -38,6 +38,10 @@ config GENERIC_CLOCKEVENTS_BUILD default y depends on GENERIC_CLOCKEVENTS +# Architecture can handle broadcast in a driver-agnostic way +config ARCH_HAS_TICK_BROADCAST + bool + # Clockevents broadcasting infrastructure config GENERIC_CLOCKEVENTS_BROADCAST bool diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 7cc81c5..f726537 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "tick-internal.h" @@ -86,6 +87,11 @@ int tick_is_broadcast_device(struct clock_event_device *dev) return (dev && tick_broadcast_device.evtdev == dev); } +static void err_broadcast(const struct cpumask *mask) +{ + pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); +} + /* * Check, if the device is disfunctional and a place holder, which * needs to be handled by the broadcast device. @@ -105,6 +111,13 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) */ if (!tick_device_is_functional(dev)) { dev->event_handler = tick_handle_periodic; + if (!dev->broadcast) + dev->broadcast = tick_broadcast; + if (!dev->broadcast) { + pr_warn_once("%s depends on broadcast, but no broadcast function available\n", + dev->name); + dev->broadcast = err_broadcast; + } cpumask_set_cpu(cpu, tick_get_broadcast_mask()); tick_broadcast_start_periodic(tick_broadcast_device.evtdev); ret = 1; -- cgit v0.10.2 From b22affe0aef429d657bc6505aacb1c569340ddd2 Mon Sep 17 00:00:00 2001 From: Leonid Shatz Date: Mon, 4 Feb 2013 14:33:37 +0200 Subject: hrtimer: Prevent hrtimer_enqueue_reprogram race hrtimer_enqueue_reprogram contains a race which could result in timer.base switch during unlock/lock sequence. hrtimer_enqueue_reprogram is releasing the lock protecting the timer base for calling raise_softirq_irqsoff() due to a lock ordering issue versus rq->lock. If during that time another CPU calls __hrtimer_start_range_ns() on the same hrtimer, the timer base might switch, before the current CPU can lock base->lock again and therefor the unlock_timer_base() call will unlock the wrong lock. [ tglx: Added comment and massaged changelog ] Signed-off-by: Leonid Shatz Signed-off-by: Izik Eidus Cc: Andrea Arcangeli Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1359981217-389-1-git-send-email-izik.eidus@ravellosystems.com Signed-off-by: Thomas Gleixner diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 6db7a5e..cdd5607 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -640,21 +640,9 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) * and expiry check is done in the hrtimer_interrupt or in the softirq. */ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base, - int wakeup) + struct hrtimer_clock_base *base) { - if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { - if (wakeup) { - raw_spin_unlock(&base->cpu_base->lock); - raise_softirq_irqoff(HRTIMER_SOFTIRQ); - raw_spin_lock(&base->cpu_base->lock); - } else - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); - - return 1; - } - - return 0; + return base->cpu_base->hres_active && hrtimer_reprogram(timer, base); } static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) @@ -735,8 +723,7 @@ static inline int hrtimer_switch_to_hres(void) { return 0; } static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base, int skip_equal) { } static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, - struct hrtimer_clock_base *base, - int wakeup) + struct hrtimer_clock_base *base) { return 0; } @@ -995,8 +982,21 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * * XXX send_remote_softirq() ? */ - if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) - hrtimer_enqueue_reprogram(timer, new_base, wakeup); + if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases) + && hrtimer_enqueue_reprogram(timer, new_base)) { + if (wakeup) { + /* + * We need to drop cpu_base->lock to avoid a + * lock ordering issue vs. rq->lock. + */ + raw_spin_unlock(&new_base->cpu_base->lock); + raise_softirq_irqoff(HRTIMER_SOFTIRQ); + local_irq_restore(flags); + return ret; + } else { + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + } + } unlock_hrtimer_base(timer, &flags); -- cgit v0.10.2 From 84e345e4e209cbe796c88fa2ad1732d7121ec100 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Fri, 8 Feb 2013 17:59:53 -0500 Subject: time, Fix setting of hardware clock in NTP code At init time, if the system time is "warped" forward in warp_clock() it will differ from the hardware clock by sys_tz.tz_minuteswest. This time difference is not taken into account when ntp updates the hardware clock, and this causes the system time to jump forward by this offset every reboot. The kernel must take this offset into account when writing the system time to the hardware clock in the ntp code. This patch adds persistent_clock_is_local which indicates that an offset has been applied in warp_clock() and accounts for the "warp" before writing the hardware clock. x86 does not have this problem as rtc writes are software limited to a +/-15 minute window relative to the current rtc time. Other arches, such as powerpc, however do a full synchronization of the system time to the rtc and will see this problem. [v2]: generated against tip/timers/core Signed-off-by: Prarit Bhargava Cc: John Stultz Cc: Thomas Gleixner Signed-off-by: John Stultz diff --git a/include/linux/time.h b/include/linux/time.h index 476e1d7..a3ab6a8 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -128,6 +128,7 @@ static inline bool has_persistent_clock(void) extern void read_persistent_clock(struct timespec *ts); extern void read_boot_clock(struct timespec *ts); +extern int persistent_clock_is_local; extern int update_persistent_clock(struct timespec now); void timekeeping_init(void); extern int timekeeping_suspended; diff --git a/kernel/time.c b/kernel/time.c index d226c6a..c2a27dd 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -115,6 +115,12 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, } /* + * Indicates if there is an offset between the system clock and the hardware + * clock/persistent clock/rtc. + */ +int persistent_clock_is_local; + +/* * Adjust the time obtained from the CMOS to be UTC time instead of * local time. * @@ -135,6 +141,8 @@ static inline void warp_clock(void) struct timespec adjust; adjust = current_kernel_time(); + if (sys_tz.tz_minuteswest != 0) + persistent_clock_is_local = 1; adjust.tv_sec += sys_tz.tz_minuteswest * 60; do_settimeofday(&adjust); } diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 313b161..b10a42b 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -511,13 +511,17 @@ static void sync_cmos_clock(struct work_struct *work) getnstimeofday(&now); if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2) { + struct timespec adjust = now; + fail = -ENODEV; + if (persistent_clock_is_local) + adjust.tv_sec -= (sys_tz.tz_minuteswest * 60); #ifdef CONFIG_GENERIC_CMOS_UPDATE - fail = update_persistent_clock(now); + fail = update_persistent_clock(adjust); #endif #ifdef CONFIG_RTC_SYSTOHC if (fail == -ENODEV) - fail = rtc_set_ntp_time(now); + fail = rtc_set_ntp_time(adjust); #endif } -- cgit v0.10.2 From 5d1d9a29bc0772abee765f09513779a2ef0ebbfd Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 8 Feb 2013 15:24:07 +0000 Subject: clockevents: Fix generic broadcast for FEAT_C3STOP Commit 12ad100046: "clockevents: Add generic timer broadcast function" made tick_device_uses_broadcast set up the generic broadcast function for dummy devices (where !tick_device_is_functional(dev)), but neglected to set up the broadcast function for devices that stop in low power states (with the CLOCK_EVT_FEAT_C3STOP flag). When these devices enter low power states they will not have the generic broadcast function assigned, and will bring down the system when an attempt is made to broadcast to them. This patch ensures that the broadcast function is also assigned for devices which require broadcast in low power states. Reported-by: Stephen Warren Signed-off-by: Mark Rutland Tested-by: Stephen Warren Cc: linux-arm-kernel@lists.infradead.org Cc: nico@linaro.org Cc: Marc.Zyngier@arm.com Cc: Will.Deacon@arm.com Cc: santosh.shilimkar@ti.com Cc: john.stultz@linaro.org Signed-off-by: Thomas Gleixner diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index f726537..2fb8cb8 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -92,6 +92,17 @@ static void err_broadcast(const struct cpumask *mask) pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); } +static void tick_device_setup_broadcast_func(struct clock_event_device *dev) +{ + if (!dev->broadcast) + dev->broadcast = tick_broadcast; + if (!dev->broadcast) { + pr_warn_once("%s depends on broadcast, but no broadcast function available\n", + dev->name); + dev->broadcast = err_broadcast; + } +} + /* * Check, if the device is disfunctional and a place holder, which * needs to be handled by the broadcast device. @@ -111,13 +122,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) */ if (!tick_device_is_functional(dev)) { dev->event_handler = tick_handle_periodic; - if (!dev->broadcast) - dev->broadcast = tick_broadcast; - if (!dev->broadcast) { - pr_warn_once("%s depends on broadcast, but no broadcast function available\n", - dev->name); - dev->broadcast = err_broadcast; - } + tick_device_setup_broadcast_func(dev); cpumask_set_cpu(cpu, tick_get_broadcast_mask()); tick_broadcast_start_periodic(tick_broadcast_device.evtdev); ret = 1; @@ -129,9 +134,10 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) */ if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) { int cpu = smp_processor_id(); - cpumask_clear_cpu(cpu, tick_get_broadcast_mask()); tick_broadcast_clear_oneshot(cpu); + } else { + tick_device_setup_broadcast_func(dev); } } raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); -- cgit v0.10.2 From e6c42c295e071dd74a66b5a9fcf4f44049888ed8 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 15 Feb 2013 11:08:11 +0100 Subject: posix-cpu-timers: Fix nanosleep task_struct leak The trinity fuzzer triggered a task_struct reference leak via clock_nanosleep with CPU_TIMERs. do_cpu_nanosleep() calls posic_cpu_timer_create(), but misses a corresponding posix_cpu_timer_del() which leads to the task_struct reference leak. Reported-and-tested-by: Tommi Rantala Signed-off-by: Stanislaw Gruszka Cc: Dave Jones Cc: John Stultz Cc: Oleg Nesterov Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20130215100810.GF4392@redhat.com Signed-off-by: Thomas Gleixner diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index a278cad..942ca27 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -1401,8 +1401,10 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, while (!signal_pending(current)) { if (timer.it.cpu.expires.sched == 0) { /* - * Our timer fired and was reset. + * Our timer fired and was reset, below + * deletion can not fail. */ + posix_cpu_timer_del(&timer); spin_unlock_irq(&timer.it_lock); return 0; } @@ -1420,9 +1422,26 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, * We were interrupted by a signal. */ sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp); - posix_cpu_timer_set(&timer, 0, &zero_it, it); + error = posix_cpu_timer_set(&timer, 0, &zero_it, it); + if (!error) { + /* + * Timer is now unarmed, deletion can not fail. + */ + posix_cpu_timer_del(&timer); + } spin_unlock_irq(&timer.it_lock); + while (error == TIMER_RETRY) { + /* + * We need to handle case when timer was or is in the + * middle of firing. In other cases we already freed + * resources. + */ + spin_lock_irq(&timer.it_lock); + error = posix_cpu_timer_del(&timer); + spin_unlock_irq(&timer.it_lock); + } + if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) { /* * It actually did fire already. -- cgit v0.10.2 From 36dfbbf136db0d645bacfd42ce7d9d6928ea532d Mon Sep 17 00:00:00 2001 From: Satoru Takeuchi Date: Fri, 15 Feb 2013 16:58:14 +0900 Subject: timers/x86/hpet: Use HPET_COUNTER to specify the hpet counter in vread_hpet() vread_hpet() uses "0xf0" as the offset of the hpet counter. To clarify the meaning of this code, it should use symbolic name, HPET_COUNTER, instead. Signed-off-by: Satoru Takeuchi Cc: H. Peter Anvin Cc: "H. Peter Anvin" Signed-off-by: Ingo Molnar diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 205ad32..c74436e 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -60,7 +60,7 @@ notrace static cycle_t vread_tsc(void) static notrace cycle_t vread_hpet(void) { - return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); + return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); } #ifdef CONFIG_PARAVIRT_CLOCK -- cgit v0.10.2