From db5e7ecc4abc91b9f26f0c0d79ef88a51e987d90 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 9 Jun 2011 08:40:59 -0400 Subject: tracing: Fix regression in printk_formats file The fix to fix the printk_formats of modules broke the printk_formats of trace_printks in the kernel. The update of what to show via the seq_file was only updated if the passed in fmt was NULL, which happens only on the first iteration. The result was showing the first format every time instead of iterating through the available formats. Signed-off-by: Steven Rostedt diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index dff763b..1f06468 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -240,13 +240,10 @@ static const char **find_next(void *v, loff_t *pos) const char **fmt = v; int start_index; - if (!fmt) - fmt = __start___trace_bprintk_fmt + *pos; - start_index = __stop___trace_bprintk_fmt - __start___trace_bprintk_fmt; if (*pos < start_index) - return fmt; + return __start___trace_bprintk_fmt + *pos; return find_next_mod_format(start_index, v, fmt, pos); } -- cgit v0.10.2 From 1123d93963cbd2546449d4d9f0c568e323cb0ac6 Mon Sep 17 00:00:00 2001 From: Max Asbock Date: Mon, 13 Jun 2011 10:18:32 -0700 Subject: timerfd: Fix wakeup of processes when timer is cancelled on clock change Currently processes waiting with poll on cancelable timerfd timers are not woken up when the timers are canceled. When the system time is set the clock_was_set() function calls timerfd_clock_was_set() to cancel and wake up processes waiting on potential cancelable timerfd timers. However the wake up currently has no effect because in the case of timerfd_read it is dependent on ctx->ticks not being 0. timerfd_poll also requires ctx->ticks being non zero. As a consequence processes waiting on cancelable timers only get woken up when the timers expire. This patch fixes this by incrementing ctx->ticks before calling wake_up. Signed-off-by: Max Asbock Cc: kay.sievers@vrfy.org Cc: virtuoso@slind.org Cc: johnstul Link: http://lkml.kernel.org/r/1307985512.4710.41.camel@w-amax.beaverton.ibm.com Signed-off-by: Thomas Gleixner diff --git a/fs/timerfd.c b/fs/timerfd.c index f67acbd..dffeb37 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -61,7 +61,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) /* * Called when the clock was set to cancel the timers in the cancel - * list. + * list. This will wake up processes waiting on these timers. The + * wake-up requires ctx->ticks to be non zero, therefore we increment + * it before calling wake_up_locked(). */ void timerfd_clock_was_set(void) { @@ -76,6 +78,7 @@ void timerfd_clock_was_set(void) spin_lock_irqsave(&ctx->wqh.lock, flags); if (ctx->moffs.tv64 != moffs.tv64) { ctx->moffs.tv64 = KTIME_MAX; + ctx->ticks++; wake_up_locked(&ctx->wqh); } spin_unlock_irqrestore(&ctx->wqh.lock, flags); -- cgit v0.10.2 From 203db2952bc87f5d610c9ad53a7d02b85897721f Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 15 Jun 2011 23:03:38 +0200 Subject: tools/perf: Fix static build of perf tool To build a statically linked version of the perf tool all needed libraries must be added in the correct order to get the symbols resolved. Currently this is broken when, e.g. python or newt support is enabled -- libpython needs libpthread which is an unconditional link dependency of the perf tool; libslang needs libm, another unconditional dependency. To solve the problem in the long run without the need to keep track of transitive library dependencies, simply make the linker look at the EXTLIBS multiple times until it has all symbols resolved. Signed-off-by: Mathias Krause Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1308171818-20370-1-git-send-email-minipli@googlemail.com Signed-off-by: Ingo Molnar diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 032ba63..940257b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -633,7 +633,7 @@ prefix_SQ = $(subst ','\'',$(prefix)) SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) -LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive $(EXTLIBS) +LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group ALL_CFLAGS += $(BASIC_CFLAGS) ALL_CFLAGS += $(ARCH_CFLAGS) -- cgit v0.10.2 From b5199515c25cca622495eb9c6a8a1d275e775088 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Jun 2011 16:22:08 +0200 Subject: clocksource: Make watchdog robust vs. interruption The clocksource watchdog code is interruptible and it has been observed that this can trigger false positives which disable the TSC. The reason is that an interrupt storm or a long running interrupt handler between the read of the watchdog source and the read of the TSC brings the two far enough apart that the delta is larger than the unstable treshold. Move both reads into a short interrupt disabled region to avoid that. Reported-and-tested-by: Vernon Mauery Signed-off-by: Thomas Gleixner Cc: stable@kernel.org diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index d4646b4..18a1baf 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -188,6 +188,7 @@ struct clocksource { #ifdef CONFIG_CLOCKSOURCE_WATCHDOG /* Watchdog related data, used by the framework */ struct list_head wd_list; + cycle_t cs_last; cycle_t wd_last; #endif } ____cacheline_aligned; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1c95fd6..e0980f0 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -185,7 +185,6 @@ static struct clocksource *watchdog; static struct timer_list watchdog_timer; static DECLARE_WORK(watchdog_work, clocksource_watchdog_work); static DEFINE_SPINLOCK(watchdog_lock); -static cycle_t watchdog_last; static int watchdog_running; static int clocksource_watchdog_kthread(void *data); @@ -254,11 +253,6 @@ static void clocksource_watchdog(unsigned long data) if (!watchdog_running) goto out; - wdnow = watchdog->read(watchdog); - wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask, - watchdog->mult, watchdog->shift); - watchdog_last = wdnow; - list_for_each_entry(cs, &watchdog_list, wd_list) { /* Clocksource already marked unstable? */ @@ -268,19 +262,28 @@ static void clocksource_watchdog(unsigned long data) continue; } + local_irq_disable(); csnow = cs->read(cs); + wdnow = watchdog->read(watchdog); + local_irq_enable(); /* Clocksource initialized ? */ if (!(cs->flags & CLOCK_SOURCE_WATCHDOG)) { cs->flags |= CLOCK_SOURCE_WATCHDOG; - cs->wd_last = csnow; + cs->wd_last = wdnow; + cs->cs_last = csnow; continue; } - /* Check the deviation from the watchdog clocksource. */ - cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) & + wd_nsec = clocksource_cyc2ns((wdnow - cs->wd_last) & watchdog->mask, + watchdog->mult, watchdog->shift); + + cs_nsec = clocksource_cyc2ns((csnow - cs->cs_last) & cs->mask, cs->mult, cs->shift); - cs->wd_last = csnow; + cs->cs_last = csnow; + cs->wd_last = wdnow; + + /* Check the deviation from the watchdog clocksource. */ if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { clocksource_unstable(cs, cs_nsec - wd_nsec); continue; @@ -318,7 +321,6 @@ static inline void clocksource_start_watchdog(void) return; init_timer(&watchdog_timer); watchdog_timer.function = clocksource_watchdog; - watchdog_last = watchdog->read(watchdog); watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask)); watchdog_running = 1; -- cgit v0.10.2 From d8ad7d1123a960cc9f276bd499f9325c6f5e1bd1 Mon Sep 17 00:00:00 2001 From: Takao Indoh Date: Tue, 29 Mar 2011 12:35:04 -0400 Subject: generic-ipi: Fix kexec boot crash by initializing call_single_queue before enabling interrupts There is a problem that kdump(2nd kernel) sometimes hangs up due to a pending IPI from 1st kernel. Kernel panic occurs because IPI comes before call_single_queue is initialized. To fix the crash, rename init_call_single_data() to call_function_init() and call it in start_kernel() so that call_single_queue can be initialized before enabling interrupts. The details of the crash are: (1) 2nd kernel boots up (2) A pending IPI from 1st kernel comes when irqs are first enabled in start_kernel(). (3) Kernel tries to handle the interrupt, but call_single_queue is not initialized yet at this point. As a result, in the generic_smp_call_function_single_interrupt(), NULL pointer dereference occurs when list_replace_init() tries to access &q->list.next. Therefore this patch changes the name of init_call_single_data() to call_function_init() and calls it before local_irq_enable() in start_kernel(). Signed-off-by: Takao Indoh Reviewed-by: WANG Cong Acked-by: Neil Horman Acked-by: Vivek Goyal Acked-by: Peter Zijlstra Cc: Milton Miller Cc: Jens Axboe Cc: Paul E. McKenney Cc: kexec@lists.infradead.org Link: http://lkml.kernel.org/r/D6CBEE2F420741indou.takao@jp.fujitsu.com Signed-off-by: Ingo Molnar diff --git a/include/linux/smp.h b/include/linux/smp.h index 7ad824d..8cc38d3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -85,12 +85,15 @@ int smp_call_function_any(const struct cpumask *mask, * Generic and arch helpers */ #ifdef CONFIG_USE_GENERIC_SMP_HELPERS +void __init call_function_init(void); void generic_smp_call_function_single_interrupt(void); void generic_smp_call_function_interrupt(void); void ipi_call_lock(void); void ipi_call_unlock(void); void ipi_call_lock_irq(void); void ipi_call_unlock_irq(void); +#else +static inline void call_function_init(void) { } #endif /* @@ -134,7 +137,7 @@ static inline void smp_send_reschedule(int cpu) { } #define smp_prepare_boot_cpu() do {} while (0) #define smp_call_function_many(mask, func, info, wait) \ (up_smp_call_function(func, info)) -static inline void init_call_single_data(void) { } +static inline void call_function_init(void) { } static inline int smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, diff --git a/init/main.c b/init/main.c index cafba67..d7211fa 100644 --- a/init/main.c +++ b/init/main.c @@ -542,6 +542,7 @@ asmlinkage void __init start_kernel(void) timekeeping_init(); time_init(); profile_init(); + call_function_init(); if (!irqs_disabled()) printk(KERN_CRIT "start_kernel(): bug: interrupts were " "enabled early\n"); diff --git a/kernel/smp.c b/kernel/smp.c index 73a1951..fb67dfa 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -74,7 +74,7 @@ static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { .notifier_call = hotplug_cfd, }; -static int __cpuinit init_call_single_data(void) +void __init call_function_init(void) { void *cpu = (void *)(long)smp_processor_id(); int i; @@ -88,10 +88,7 @@ static int __cpuinit init_call_single_data(void) hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); register_cpu_notifier(&hotplug_cfd_notifier); - - return 0; } -early_initcall(init_call_single_data); /* * csd_lock/csd_unlock used to serialize access to per-cpu csd resources -- cgit v0.10.2 From 7d68dc3f1003a38948c55c803c32d1989dd49198 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 14 Jun 2011 19:53:09 +0200 Subject: x86, efi: Do not reserve boot services regions within reserved areas Commit 916f676f8dc started reserving boot service code since some systems require you to keep that code around until SetVirtualAddressMap is called. However, in some cases those areas will overlap with reserved regions. The proper medium-term fix is to fix the bootloader to prevent the conflicts from occurring by moving the kernel to a better position, but the kernel should check for this possibility, and only reserve regions which can be reserved. Signed-off-by: Maarten Lankhorst Link: http://lkml.kernel.org/r/4DF7A005.1050407@gmail.com Acked-by: Matthew Garrett Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h index 19ae14b..0cd3800 100644 --- a/arch/x86/include/asm/memblock.h +++ b/arch/x86/include/asm/memblock.h @@ -4,7 +4,6 @@ #define ARCH_DISCARD_MEMBLOCK u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align); -void memblock_x86_to_bootmem(u64 start, u64 end); void memblock_x86_reserve_range(u64 start, u64 end, char *name); void memblock_x86_free_range(u64 start, u64 end); @@ -19,5 +18,6 @@ u64 memblock_x86_hole_size(u64 start, u64 end); u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align); u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit); u64 memblock_x86_memory_in_range(u64 addr, u64 limit); +bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align); #endif diff --git a/arch/x86/mm/memblock.c b/arch/x86/mm/memblock.c index aa11693..992da5e 100644 --- a/arch/x86/mm/memblock.c +++ b/arch/x86/mm/memblock.c @@ -8,7 +8,7 @@ #include /* Check for already reserved areas */ -static bool __init check_with_memblock_reserved_size(u64 *addrp, u64 *sizep, u64 align) +bool __init memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align) { struct memblock_region *r; u64 addr = *addrp, last; @@ -59,7 +59,7 @@ u64 __init memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align) if (addr >= ei_last) continue; *sizep = ei_last - addr; - while (check_with_memblock_reserved_size(&addr, sizep, align)) + while (memblock_x86_check_reserved_size(&addr, sizep, align)) ; if (*sizep) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 0d3a4fa..474356b 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -310,14 +310,31 @@ void __init efi_reserve_boot_services(void) for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { efi_memory_desc_t *md = p; - unsigned long long start = md->phys_addr; - unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + u64 start = md->phys_addr; + u64 size = md->num_pages << EFI_PAGE_SHIFT; if (md->type != EFI_BOOT_SERVICES_CODE && md->type != EFI_BOOT_SERVICES_DATA) continue; - - memblock_x86_reserve_range(start, start + size, "EFI Boot"); + /* Only reserve where possible: + * - Not within any already allocated areas + * - Not over any memory area (really needed, if above?) + * - Not within any part of the kernel + * - Not the bios reserved area + */ + if ((start+size >= virt_to_phys(_text) + && start <= virt_to_phys(_end)) || + !e820_all_mapped(start, start+size, E820_RAM) || + memblock_x86_check_reserved_size(&start, &size, + 1<num_pages = 0; + memblock_dbg(PFX "Could not reserve boot range " + "[0x%010llx-0x%010llx]\n", + start, start+size-1); + } else + memblock_x86_reserve_range(start, start+size, + "EFI Boot"); } } @@ -334,6 +351,10 @@ static void __init efi_free_boot_services(void) md->type != EFI_BOOT_SERVICES_DATA) continue; + /* Could not reserve boot area */ + if (!size) + continue; + free_bootmem_late(start, size); } } -- cgit v0.10.2 From c1f5c54b57341e872a9d375dccef7257f86033ef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 18 Jun 2011 22:51:13 +0200 Subject: x86, MAINTAINERS: Add x86 MCE people Announce the new x86 MCE infrastructure maintainers. Acked-by: Borislav Petkov Acked-by: Tony Luck Acked-by: H. Peter Anvin Acked-by: Thomas Gleixner Link: http://lkml.kernel.org/n/tip-8hs7yob6wib4vblmrmbpbav4@git.kernel.org Signed-off-by: Ingo Molnar diff --git a/MAINTAINERS b/MAINTAINERS index 502f2dd..b12b8c1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7007,6 +7007,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86. S: Maintained F: drivers/platform/x86 +X86 MCE INFRASTRUCTURE +M: Tony Luck +M: Borislav Petkov +L: linux-edac@vger.kernel.org +S: Maintained +F: arch/x86/kernel/cpu/mcheck/* + XEN HYPERVISOR INTERFACE M: Jeremy Fitzhardinge M: Konrad Rzeszutek Wilk -- cgit v0.10.2